Skip to content

Commit

Permalink
restore ability to perform an internal prune of a negated term
Browse files Browse the repository at this point in the history
  • Loading branch information
apmoriarty committed May 2, 2024
1 parent 9a7fe0d commit 815e48c
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,11 @@ public Object visit(ASTJexlScript node, Object data) {

@Override
public Object visit(ASTNotNode node, Object data) {
return visitOrPrune(node, data);
Object o = node.jjtGetChild(0).jjtAccept(this, data);
if (node.jjtGetNumChildren() == 0) {
pruneNodeFromParent(node);
}
return o;
}

@Override
Expand All @@ -151,7 +155,11 @@ public Object visit(ASTFunctionNode node, Object data) {

@Override
public Object visit(ASTReference node, Object data) {
return visitOrPrune(node, data);
Object o = node.jjtGetChild(0).jjtAccept(this, data);
if (node.jjtGetNumChildren() == 0) {
pruneNodeFromParent(node);
}
return o;
}

@Override
Expand All @@ -164,25 +172,34 @@ public Object visit(ASTReferenceExpression node, Object data) {
}

@Override
@SuppressWarnings("unchecked")
public Object visit(ASTOrNode node, Object data) {

Set<String> types;
if (data == null) {
// just a visit
return ingestTypeVisitor.getIngestTypesForJunction(node);
}

Set<String> pruningTypes = (Set<String>) data;

// must traverse the children in reverse order because of pruning
for (int i = node.jjtGetNumChildren() - 1; i >= 0; i--) {
node.jjtGetChild(i).jjtAccept(this, pruningTypes);
// normal visit
types = new HashSet<>();
// must traverse the children in reverse order because of pruning
for (int i = node.jjtGetNumChildren() - 1; i >= 0; i--) {
Set<String> childTypes = (Set<String>) node.jjtGetChild(i).jjtAccept(this, data);
types.addAll(childTypes);
}
} else {
// pruning visit
Set<String> pruningTypes = (Set<String>) data;
// must traverse the children in reverse order because of pruning
for (int i = node.jjtGetNumChildren() - 1; i >= 0; i--) {
node.jjtGetChild(i).jjtAccept(this, pruningTypes);
}
types = pruningTypes;
}

// all children could self-prune, for example (A && B) || (C && D) when no term maps to the same datatype
if (node.jjtGetNumChildren() == 0) {
pruneNodeFromParent(node);
}

return pruningTypes;
return types;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import static datawave.query.jexl.functions.EvaluationPhaseFilterFunctions.EVAL_PHASE_FUNCTION_NAMESPACE;
import static datawave.query.jexl.functions.GroupingRequiredFilterFunctions.GROUPING_REQUIRED_FUNCTION_NAMESPACE;

import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -53,6 +52,8 @@
* The full set of ingest types is {1, 2, 3}, but the <b>effective set</b> is just ingest type 1.
* <p>
* Much of this code is originated from the {@link IngestTypePruningVisitor}.
* <p>
* Note: IngestType and Datatype are used interchangeably, but Datatype does not refer to a data type such as {@link datawave.data.type.LcType}
*/
public class IngestTypeVisitor extends BaseVisitor {

Expand Down Expand Up @@ -84,6 +85,7 @@ public static Set<String> getIngestTypes(JexlNode node, TypeMetadata typeMetadat
if (o instanceof Set) {
Set<String> ingestTypes = (Set<String>) o;
if (ingestTypes.contains(UNKNOWN_TYPE)) {
// return just the UNKNOWN_TYPE
ingestTypes.retainAll(Collections.singleton(UNKNOWN_TYPE));
return ingestTypes;
}
Expand Down Expand Up @@ -310,6 +312,13 @@ public Set<String> getFieldsForLeaf(JexlNode node) {
// @formatter:on
}

/**
* Use the functions descriptor when getting fields for an {@link ASTFunctionNode}.
*
* @param node
* a function node
* @return the function fields
*/
private Set<String> getFieldsForFunctionNode(ASTFunctionNode node) {
FunctionJexlNodeVisitor visitor = FunctionJexlNodeVisitor.eval(node);
switch (visitor.namespace()) {
Expand Down Expand Up @@ -341,6 +350,13 @@ private Set<String> getFieldsForFunctionNode(ASTFunctionNode node) {
}
}

/**
* Wrapper around {@link TypeMetadata#getDataTypesForField(String)} that supports caching the results of a potentially expensive call.
*
* @param field
* the query field
* @return the ingest types associated with the provided field
*/
public Set<String> getIngestTypesForField(String field) {
if (!ingestTypeCache.containsKey(field)) {
Set<String> types = typeMetadata.getDataTypesForField(field);
Expand All @@ -352,6 +368,26 @@ public Set<String> getIngestTypesForField(String field) {
return ingestTypeCache.get(field);
}

/**
* Get the effective ingest types for an intersection. This is not as simple as it first appears.
* <p>
* Consider the following queries where field A maps to datatype 1 and field B maps to datatype 2:
* <p>
* <code>A == '1' &amp;&amp; !(B == '2')</code>
* </p>
* <p>
* <code>A == '1' &amp;&amp; B == null</code>
* </p>
* <p>
* The both queries appear to be non-executable due to exclusive datatypes. A normal intersection of the A and B terms should produce an empty set. However,
* the A term is executable while in both cases the B term acts as a filter. The B term is always true by definition of being an exclusive datatype, so this
* visitor will return ingest type 1 for this intersection. The IngestTypePruningVisitor will correctly detect that the B term is prunable and remove it
* from the query.
*
* @param node
* an AndNode
* @return the effective ingest types for this intersection
*/
@SuppressWarnings("unchecked")
public Set<String> getIngestTypesForIntersection(ASTAndNode node) {
Set<String> ingestTypes = new HashSet<>();
Expand Down Expand Up @@ -385,6 +421,15 @@ public Set<String> getIngestTypesForIntersection(ASTAndNode node) {
return ingestTypes;
}

/**
* If either side of the intersection contains an UNKNOWN_TYPE we must persist that.
*
* @param typesA
* types for left side
* @param typesB
* types for right side
* @return the intersection of two sets of types, with special handling if an UNKNOWN type is present on either side.
*/
private Set<String> intersectTypes(Set<String> typesA, Set<String> typesB) {
if (typesA.contains(UNKNOWN_TYPE) || typesB.contains(UNKNOWN_TYPE)) {
Set<String> unknown = new HashSet<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ public void testNotNulls() throws Exception {
"filter:isNotNull(UUID) || filter:isNotNull(NULL1)",
"filter:isNotNull(ONE_NULL)",
"UUID =~ '^[CS].*' AND filter:isNotNull(UUID)",
"UUID =~ '^[CS].*' AND filter:isNotNull(NULL1)", // dis guy
"UUID =~ '^[CS].*' AND filter:isNotNull(NULL1)",
"UUID =~ '^[CS].*' AND filter:isNotNull(NULL1||NULL2)",
"UUID =~ '^[CS].*' AND filter:isNotNull(BOTH_NULL)",
"UUID =~ '^[CS].*' AND filter:isNotNull(UUID||NULL1)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -440,8 +440,12 @@ public void testEvaluationOnlyField() {

@Test
public void testPruneNegation() {
String query = "A == '1' || !((_Delayed_ = true) && (A == '1' && C == '2'))";
test(query, query);
// internal prune
String query = "A == '1' || !((_Delayed_ = true) && (A == '2' && C == '3'))";
test(query, "A == '1'");

query = "A == '0' && (A == '1' || !((_Delayed_ = true) && (A == '2' && C == '3')))";
test(query, "A == '0' && (A == '1')");
}

@Test
Expand Down Expand Up @@ -622,6 +626,18 @@ public void testFilterFunctionExcludeExpandedIntoMutuallyExclusiveFields() {
test(query, expected, metadata);
}

@Test
public void testUnionOfNegatedTerms() {
String query = "!(A == '1') || !(B == '2') || !(C == '3')";
test(query, query);
}

@Test
public void testUnionOfNotNullTerms() {
String query = "!(A == null) || !(B == null) || !(C == null)";
test(query, query);
}

private void test(String query, String expected) {
test(query, expected, typeMetadata, null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,18 @@ void testFilterFunctionExcludeExpandedIntoMutuallyExclusiveFields() {
test(query, Collections.singleton("type1"), metadata);
}

@Test
void testUnionOfNegatedTerms() {
String query = "!(A == '1') || !(B == '2') || !(C == '3')";
test(query, Sets.union(aTypes, bTypes));
}

@Test
void testUnionOfNotNullTerms() {
String query = "!(A == null) || !(B == null) || !(C == null)";
test(query, Sets.union(aTypes, bTypes));
}

private void assertSingleNode(String query, Set<String> expectedIngestTypes) {
assertSingleNode(query, expectedIngestTypes, typeMetadata);
}
Expand Down

0 comments on commit 815e48c

Please sign in to comment.