Skip to content

Commit

Permalink
Allow redundant dots in JSON paths accepted by json_extract[_scalar] …
Browse files Browse the repository at this point in the history
…Presto (#9585)

Summary:
Pull Request resolved: #9585

json_extract_scalar and json_extract Presto function allow paths like '$.[0].
[1].[2].foo' that include "redundant" dots before opening brackets. These paths
are non-standard. They are enabled via use of Jayway engine.

This change allows Velox to support these paths by ignoring dots that appear
before opening brackets.

See prestodb/presto#22589 Part of
#7049

Reviewed By: xiaoxmeng

Differential Revision: D56467690

fbshipit-source-id: 42866815ff186510679a0ab877e2796d324309c7
  • Loading branch information
mbasmanova authored and facebook-github-bot committed Apr 23, 2024
1 parent 7e9550e commit 2a67b6e
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 1 deletion.
6 changes: 5 additions & 1 deletion velox/functions/prestosql/json/JsonPathTokenizer.cpp
Expand Up @@ -85,7 +85,11 @@ std::optional<std::string> JsonPathTokenizer::getNext() {
}

if (match(DOT)) {
return matchDotKey();
if (hasNext() && path_[index_] != OPEN_BRACKET) {
return matchDotKey();
}
// Simply ignore the '.' followed by '['. This allows non-standard paths
// like '$.[0].[1].[2]' supported by Jayway / Presto.
}

if (match(OPEN_BRACKET)) {
Expand Down
8 changes: 8 additions & 0 deletions velox/functions/prestosql/json/JsonPathTokenizer.h
Expand Up @@ -38,9 +38,17 @@ namespace facebook::velox::functions {
/// equivalent. This is not part of JSONPath syntax, but this is the behavior of
/// Jayway implementation used by Presto.
///
/// It is allowed to use dot-notation redundantly, e.g. non-standard path
/// '$.[0].foo' is allowed and is equivalent to '$[0].foo'. Similarly, paths
/// '$.[0].[1].[2]' and '$[0].[1][2]' are allowed and equivalent to
/// '$[0][1][2]'.
///
/// Examples:
/// "$"
/// "$.store.book[0].author"
/// "store.book[0].author"
/// "store.book.[0].author"
/// "$[0].foo.bar"
class JsonPathTokenizer {
public:
/// Resets the tokenizer to a new path. This method must be called and return
Expand Down
Expand Up @@ -95,6 +95,14 @@ TEST(JsonPathTokenizerTest, validPaths) {
assertValidPath("foo[12].bar", TokenList({"foo", "12", "bar"}));
assertValidPath("foo.bar.baz", TokenList({"foo", "bar", "baz"}));

// Paths with redundant '.'s.
assertValidPath("$.[0].[1].[2]", TokenList({"0", "1", "2"}));
assertValidPath("$[0].[1].[2]", TokenList({"0", "1", "2"}));
assertValidPath("$[0].[1][2]", TokenList({"0", "1", "2"}));
assertValidPath("$.[0].[1].foo.[2]", TokenList({"0", "1", "foo", "2"}));
assertValidPath("$[0].[1].foo.[2]", TokenList({"0", "1", "foo", "2"}));
assertValidPath("$[0][1].foo.[2]", TokenList({"0", "1", "foo", "2"}));

assertQuotedToken("!@#$%^&*()[]{}/?'"s, TokenList{"!@#$%^&*()[]{}/?'"s});
assertQuotedToken("ab\u0001c"s, TokenList{"ab\u0001c"s});
assertQuotedToken("ab\0c"s, TokenList{"ab\0c"s});
Expand Down
7 changes: 7 additions & 0 deletions velox/functions/prestosql/tests/JsonExtractScalarTest.cpp
Expand Up @@ -108,6 +108,13 @@ TEST_F(JsonExtractScalarTest, simple) {
// Paths without leading '$'.
EXPECT_EQ(jsonExtractScalar(R"({"k1":"v1"})", "k1"), "v1");
EXPECT_EQ(jsonExtractScalar(R"({"k1":{"k2": 999}})", "k1.k2"), "999");

// Paths with redundant '.'s.
auto json = "[1, 2, [10, 20, [100, 200, 300]]]";
EXPECT_EQ(jsonExtractScalar(json, "$[2][2][2]"), "300");
EXPECT_EQ(jsonExtractScalar(json, "$.[2].[2].[2]"), "300");
EXPECT_EQ(jsonExtractScalar(json, "$[2].[2].[2]"), "300");
EXPECT_EQ(jsonExtractScalar(json, "$[2][2].[2]"), "300");
}

TEST_F(JsonExtractScalarTest, jsonType) {
Expand Down
8 changes: 8 additions & 0 deletions velox/functions/prestosql/tests/JsonFunctionsTest.cpp
Expand Up @@ -610,6 +610,14 @@ TEST_F(JsonFunctionsTest, jsonExtract) {
EXPECT_EQ(std::nullopt, jsonExtract(json, "x.c"));
EXPECT_EQ(std::nullopt, jsonExtract(json, "x.b[20]"));

// Paths with redundant '.'s.
json = R"([[[{"a": 1, "b": [1, 2, 3]}]]])";
EXPECT_EQ("1", jsonExtract(json, "$.[0][0][0].a"));
EXPECT_EQ("[1, 2, 3]", jsonExtract(json, "$.[0].[0].[0].b"));
EXPECT_EQ("[1, 2, 3]", jsonExtract(json, "$[0][0].[0].b"));
EXPECT_EQ("3", jsonExtract(json, "$[0][0][0].b.[2]"));
EXPECT_EQ("3", jsonExtract(json, "$.[0].[0][0].b.[2]"));

// TODO The following paths are supported by Presto via Jayway, but do not
// work in Velox yet. Figure out how to add support for these.
VELOX_ASSERT_THROW(jsonExtract(kJson, "$..price"), "Invalid JSON path");
Expand Down

0 comments on commit 2a67b6e

Please sign in to comment.