Skip to content

Commit

Permalink
Add ColorsTest for testing complex datatype, normalizer and tokenizat…
Browse files Browse the repository at this point in the history
…ion interaction

Completed refactor of IngestTypeVisitor and IngestTypePruningVisitor
  • Loading branch information
apmoriarty committed Apr 17, 2024
1 parent 6730346 commit 820a244
Show file tree
Hide file tree
Showing 12 changed files with 1,753 additions and 164 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
package datawave.query.jexl.visitors;

import java.util.Collections;
import java.util.HashMap;
import static datawave.query.jexl.visitors.IngestTypeVisitor.UNKNOWN_TYPE;

import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import org.apache.commons.jexl3.parser.ASTAndNode;
Expand Down Expand Up @@ -49,20 +48,12 @@
public class IngestTypePruningVisitor extends BaseVisitor {
private static final Logger log = Logger.getLogger(IngestTypePruningVisitor.class);

private static final String UNKNOWN_TYPE = "UNKNOWN_TYPE";

// cache expensive calls to get ingest types per field
private final TypeMetadata typeMetadata;
private final Map<String,Set<String>> ingestTypeCache;

private int termsPruned = 0;
private int nodesPruned = 0;

private final IngestTypeVisitor ingestTypeVisitor;

public IngestTypePruningVisitor(TypeMetadata typeMetadata) {
this.typeMetadata = typeMetadata;
this.ingestTypeCache = new HashMap<>();
this.ingestTypeVisitor = new IngestTypeVisitor(typeMetadata);
}

Expand Down Expand Up @@ -165,12 +156,33 @@ public Object visit(ASTReference node, Object data) {

@Override
public Object visit(ASTReferenceExpression node, Object data) {
return visitOrPrune(node, data);
Object o = node.jjtGetChild(0).jjtAccept(this, data);
if (node.jjtGetNumChildren() == 0) {
pruneNodeFromParent(node);
}
return o;
}

@Override
public Object visit(ASTOrNode node, Object data) {
return visitOrPrune(node, data);

if (data == null) {
// just a visit
return ingestTypeVisitor.getIngestTypesForJunction(node);
}

Set<String> pruningTypes = (Set<String>) data;

// must traverse the children in reverse order because of pruning
for (int i = node.jjtGetNumChildren() - 1; i >= 0; i--) {
node.jjtGetChild(i).jjtAccept(this, pruningTypes);
}

if (node.jjtGetNumChildren() == 0) {
pruneNodeFromParent(node);
}

return pruningTypes;
}

/**
Expand All @@ -188,16 +200,20 @@ public Object visit(ASTAndNode node, Object data) {

QueryPropertyMarker.Instance instance = QueryPropertyMarker.findInstance(node);
if (instance.isAnyType()) {
return visitMarker(instance, node, data);
Object o = visitMarker(instance, node, data);
if (node.jjtGetNumChildren() == 0) {
pruneNodeFromParent(node);
}
return o;
}

// getting ingest types for an intersection is different
Set<String> ingestTypes = getIngestTypesForIntersection(node);
Set<String> ingestTypes = ingestTypeVisitor.getIngestTypesForIntersection(node, true);

// automatically prune if there is no common ingest type
if (ingestTypes.isEmpty()) {
pruneNodeFromParent(node);
return Collections.emptySet();
return new HashSet<>();
}

// the AndNode is where we can generate a set of ingest types used to prune child nodes
Expand Down Expand Up @@ -244,7 +260,7 @@ private Set<String> visitMarker(QueryPropertyMarker.Instance instance, JexlNode
// ExceededOr marker can be handled on its own
if (instance.isType(QueryPropertyMarker.MarkerType.EXCEEDED_OR)) {
String field = new ExceededOr(instance.getSource()).getField();
Set<String> ingestTypes = getIngestTypesForField(field);
Set<String> ingestTypes = ingestTypeVisitor.getIngestTypesForField(field);
if (data instanceof Set<?>) {
return pruneLeaf(ingestTypes, node, data);
}
Expand All @@ -254,7 +270,7 @@ private Set<String> visitMarker(QueryPropertyMarker.Instance instance, JexlNode
JexlNode source = node.jjtGetChild(1);
Set<String> dts = (Set<String>) source.jjtAccept(this, data);

if (source.jjtGetParent() == null) {
if (source.jjtGetParent() == null || source.jjtGetNumChildren() == 0) {
pruneNodeFromParent(node);
}

Expand All @@ -265,7 +281,7 @@ private Set<String> visitMarker(QueryPropertyMarker.Instance instance, JexlNode

private Set<String> visitOrPrune(JexlNode node, Object data) {

Set<String> ingestTypes = getIngestTypes(node);
Set<String> ingestTypes = ingestTypeVisitor.getIngestTypes(node);

// check for pruning
if (data instanceof Set<?>) {
Expand Down Expand Up @@ -296,7 +312,7 @@ private Set<String> pruneLeaf(Set<String> ingestTypes, JexlNode node, Object dat
pruneNodeFromParent(node);
termsPruned++;
}
return Collections.emptySet();
return new HashSet<>();
}

/**
Expand Down Expand Up @@ -324,7 +340,7 @@ private Set<String> pruneJunction(JexlNode node, Object data) {
for (int i = node.jjtGetNumChildren() - 1; i >= 0; i--) {
node.jjtGetChild(i).jjtAccept(this, data);
}
return Collections.emptySet();
return new HashSet<>();
}

/**
Expand All @@ -345,80 +361,6 @@ private boolean isJunction(JexlNode node) {
// @formatter:on
}

// get ingest types

private Set<String> getIngestTypes(JexlNode node) {
if (isJunction(node)) {
return getIngestTypesForJunction(node);
} else {
return getIngestTypesForLeaf(node);
}
}

@SuppressWarnings("unchecked")
public Set<String> getIngestTypesForJunction(JexlNode node) {
Set<String> ingestTypes = new HashSet<>();
for (int i = 0; i < node.jjtGetNumChildren(); i++) {
Set<String> found = (Set<String>) node.jjtGetChild(i).jjtAccept(this, null);
ingestTypes.addAll(found);
}
return ingestTypes;
}

/**
* In most cases a leaf will have a single field. In certain cases a function may produce more than one field, and in rare cases one may see leaf nodes like
* <code>FIELD1 == FIELD2</code>
*
* @param node
* the leaf node
* @return a set of ingestTypes
*/
public Set<String> getIngestTypesForLeaf(JexlNode node) {
node = JexlASTHelper.dereference(node);
if (node instanceof ASTEQNode) {
Object literal = JexlASTHelper.getLiteralValue(node);
if (literal == null) {
return Collections.singleton(UNKNOWN_TYPE);
}
}
return ingestTypeVisitor.getIngestTypes(node);
}

public Set<String> getIngestTypesForField(String field) {
if (!ingestTypeCache.containsKey(field)) {
Set<String> types = typeMetadata.getDataTypesForField(field);
if (types.isEmpty()) {
types.add(UNKNOWN_TYPE);
}
ingestTypeCache.put(field, types);
}
return ingestTypeCache.get(field);
}

@SuppressWarnings("unchecked")
private Set<String> getIngestTypesForIntersection(ASTAndNode node) {
Set<String> ingestTypes = new HashSet<>();
for (int i = 0; i < node.jjtGetNumChildren(); i++) {
JexlNode child = node.jjtGetChild(i);
Set<String> childIngestTypes = (Set<String>) child.jjtAccept(this, null);

ingestTypes = ingestTypes.isEmpty() ? childIngestTypes : intersectTypes(ingestTypes, childIngestTypes);

if (ingestTypes.isEmpty()) {
// short circuit. no need to continue traversing the intersection.
break;
}
}
return ingestTypes;
}

private Set<String> intersectTypes(Set<String> typesA, Set<String> typesB) {
if (typesA.contains(UNKNOWN_TYPE) || typesB.contains(UNKNOWN_TYPE)) {
return Collections.singleton(UNKNOWN_TYPE);
}
return Sets.intersection(typesA, typesB);
}

private void pruneNodeFromParent(JexlNode node) {
JexlNodes.removeFromParent(node.jjtGetParent(), node);
nodesPruned++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public static Set<String> getIngestTypes(JexlNode node, TypeMetadata typeMetadat
return ingestTypes;
}
}
return Collections.emptySet();
return new HashSet<>();
}

/**
Expand Down Expand Up @@ -196,7 +196,7 @@ public Object visit(ASTAndNode node, Object data) {
}

// getting ingest types for an intersection is different
return getIngestTypesForIntersection(node);
return getIngestTypesForIntersection(node, false);
}

/**
Expand Down Expand Up @@ -265,14 +265,14 @@ public Set<String> getIngestTypesForJunction(JexlNode node) {
*/
public Set<String> getIngestTypesForLeaf(JexlNode node) {
node = JexlASTHelper.dereference(node);
if (node instanceof ASTEQNode) {
Object literal = JexlASTHelper.getLiteralValueSafely(node);
if (literal == null) {
return Collections.singleton(UNKNOWN_TYPE);
}
}

Set<String> ingestTypes = new HashSet<>();
if ((node instanceof ASTEQNode || node instanceof ASTNENode) && JexlASTHelper.getLiteralValueSafely(node) == null) {
// terms like (FIELD == null) or (FIELD != null) should be pruned in the future, but for now do nothing.
ingestTypes.add(UNKNOWN_TYPE);
return ingestTypes;
}

Set<String> fields = getFieldsForLeaf(node);
for (String field : fields) {
ingestTypes.addAll(getIngestTypesForField(field));
Expand All @@ -299,7 +299,7 @@ public Set<String> getFieldsForLeaf(JexlNode node) {
} catch (Exception e) {
// if a FunctionsDescriptor throws an exception for any reason then return an empty collection
// so the node gets treated as an unknown type
return Collections.emptySet();
return new HashSet<>();
}
}

Expand Down Expand Up @@ -338,7 +338,7 @@ private Set<String> getFieldsForFunctionNode(ASTFunctionNode node) {
default:
// do nothing
log.warn("Unhandled function namespace: " + visitor.namespace());
return Collections.emptySet();
return new HashSet<>();
}
}

Expand All @@ -354,14 +354,21 @@ public Set<String> getIngestTypesForField(String field) {
}

@SuppressWarnings("unchecked")
private Set<String> getIngestTypesForIntersection(ASTAndNode node) {
public Set<String> getIngestTypesForIntersection(ASTAndNode node, boolean pruning) {
Set<String> ingestTypes = new HashSet<>();
for (int i = 0; i < node.jjtGetNumChildren(); i++) {
JexlNode child = JexlASTHelper.dereference(node.jjtGetChild(i));

if (pruning && child instanceof ASTNotNode) {
continue;
}

Set<String> childIngestTypes = (Set<String>) child.jjtAccept(this, null);

if (childIngestTypes == null) {
continue; // we could have a malformed query or a query with a _Drop_ marker
if (childIngestTypes == null || (ingestTypes.isEmpty() && childIngestTypes.contains(UNKNOWN_TYPE))) {
// we could have a malformed query or a query with a _Drop_ marker
// or, the first term could be UNKNOWN
continue;
}

if (ingestTypes.isEmpty()) {
Expand All @@ -385,7 +392,9 @@ private Set<String> getIngestTypesForIntersection(ASTAndNode node) {

private Set<String> intersectTypes(Set<String> typesA, Set<String> typesB) {
if (typesA.contains(UNKNOWN_TYPE) || typesB.contains(UNKNOWN_TYPE)) {
return Collections.singleton(UNKNOWN_TYPE);
Set<String> unknown = new HashSet<>();
unknown.add(UNKNOWN_TYPE);
return unknown;
}
typesA.retainAll(typesB);
return typesA;
Expand Down

0 comments on commit 820a244

Please sign in to comment.