Skip to content

Commit

Permalink
Use the inverted reverse query model mappings
Browse files Browse the repository at this point in the history
  • Loading branch information
apmoriarty committed Apr 1, 2024
1 parent e4448b4 commit abc879e
Show file tree
Hide file tree
Showing 10 changed files with 200 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2387,6 +2387,12 @@ public static void configureTypeMappings(ShardQueryConfiguration config, Iterato

if (config.getReduceTypeMetadata() && !isPreload) {
Set<String> fieldsToRetain = ReduceFields.getQueryFields(config.getQueryTree());
fieldsToRetain.addAll(config.getProjectFields());
for (Entry<String,String> entry : config.getCompositeToFieldMap().entries()) {
fieldsToRetain.add(entry.getKey());
fieldsToRetain.add(entry.getValue());
}

typeMetadata = typeMetadata.reduce(fieldsToRetain);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ public void initialize(ShardQueryConfiguration config, AccumuloClient client, Qu
config.setAuthorizations(auths);
config.setMaxScannerBatchSize(getMaxScannerBatchSize());
config.setMaxIndexBatchSize(getMaxIndexBatchSize());
setConfig(config);

setScannerFactory(new ScannerFactory(config));

Expand Down Expand Up @@ -673,9 +674,18 @@ private void addConfigBasedTransformers() {
MetadataHelperFactory factory = getMetadataHelperFactory();
MetadataHelper helper = factory.createMetadataHelper(getConfig().getClient(), getMetadataTableName(), getConfig().getAuthorizations());
TypeMetadata typeMetadata = helper.getTypeMetadata(getConfig().getDatatypeFilter());

if (config.getQueryModel() == null) {
if (queryModel != null) {
config.setQueryModel(queryModel);
} else {
loadQueryModel(helper, config);
}
}

AttributeRebuilder rebuilder = new AttributeRebuilder(typeMetadata, getQueryModel());
((DocumentTransformer) this.transformerInstance).setAttributeRebuilder(rebuilder);
} catch (TableNotFoundException e) {
} catch (TableNotFoundException | InstantiationException | IllegalAccessException | ExecutionException e) {
log.error("could not build type metadata for responses, disabling type metadata reduction");
setReduceTypeMetadata(false);
setReduceTypeMetadataPerShard(false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,14 @@

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;

import datawave.core.iterators.filesystem.FileSystemCache;
import datawave.query.composite.CompositeMetadata;
import datawave.query.config.ShardQueryConfiguration;
import datawave.query.exceptions.DatawaveFatalQueryException;
import datawave.query.exceptions.InvalidQueryException;
Expand Down Expand Up @@ -459,6 +462,23 @@ private void reduceTypeMetadata(ASTJexlScript script, IteratorSetting newIterato
TypeMetadata typeMetadata = new TypeMetadata(serializedTypeMetadata);

Set<String> fieldsToRetain = ReduceFields.getQueryFields(script);

// add projection fields
if (newIteratorSetting.getOptions().containsKey(QueryOptions.PROJECTION_FIELDS)) {
String opt = newIteratorSetting.getOptions().get(QueryOptions.PROJECTION_FIELDS);
fieldsToRetain.addAll(Splitter.on(',').splitToList(opt));
}

// add composite fields
if (newIteratorSetting.getOptions().containsKey(QueryOptions.COMPOSITE_METADATA)) {
String opt = newIteratorSetting.getOptions().get(QueryOptions.COMPOSITE_METADATA);
CompositeMetadata compositeMetadata = CompositeMetadata.fromBytes(java.util.Base64.getDecoder().decode(opt));
for (Multimap<String,String> multimap : compositeMetadata.getCompositeFieldMapByType().values()) {
fieldsToRetain.addAll(multimap.keySet());
fieldsToRetain.addAll(multimap.values());
}
}

typeMetadata = typeMetadata.reduce(fieldsToRetain);

serializedTypeMetadata = typeMetadata.toString();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package datawave.query.util.transformer;

import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
Expand All @@ -9,8 +8,6 @@

import org.apache.log4j.Logger;

import com.google.common.collect.Multimap;

import datawave.data.type.Type;
import datawave.query.attributes.Attribute;
import datawave.query.attributes.TypeAttribute;
Expand All @@ -24,7 +21,7 @@
public class AttributeRebuilder {
private static final Logger log = Logger.getLogger(AttributeRebuilder.class);
private final TypeMetadata typeMetadata;
private final QueryModel queryModel;
private final Map<String,String> fieldMap;
private final Map<String,Class<?>> classCache;

/**
Expand All @@ -35,10 +32,22 @@ public class AttributeRebuilder {
*/
public AttributeRebuilder(TypeMetadata typeMetadata, @Nullable QueryModel queryModel) {
this.typeMetadata = typeMetadata;
this.queryModel = queryModel;
if (queryModel == null) {
this.fieldMap = new HashMap<>();
} else {
this.fieldMap = invertMap(queryModel.getReverseQueryMapping());
}
this.classCache = new HashMap<>();
}

private Map<String,String> invertMap(Map<String,String> map) {
Map<String,String> mappings = new HashMap<>();
for (Map.Entry<String,String> entry : map.entrySet()) {
mappings.put(entry.getValue(), entry.getKey());
}
return mappings;
}

/**
* Given a field and an attribute, return the correctly typed attribute
*
Expand Down Expand Up @@ -88,20 +97,12 @@ private void populateNormalizerFromQueryModel(String field, Set<String> normaliz
log.trace("Field " + field + " not found in TypeMetadata, falling back to QueryModel");
}

if (queryModel == null) {
log.warn("QueryModel is null, cannot populate normalizers for " + field + " from model");
return;
String alias = fieldMap.get(field);
if (alias == null) {
log.error("Field " + field + " did not have a reverse mapping in the query model");
}

// check forward mappings
Multimap<String,String> forwardMappings = queryModel.getForwardQueryMapping();

if (forwardMappings.keySet().contains(field)) {
Collection<String> values = forwardMappings.get(field);
for (String value : values) {
normalizerNames.addAll(typeMetadata.getNormalizerNamesForField(value));
}
}
normalizerNames.addAll(typeMetadata.getNormalizerNamesForField(alias));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,10 @@ protected void runTestQuery(List<String> expected, String plan, String querystr,
log.debug("logic: " + settings.getQueryLogicName());
logic.setMaxEvaluationPipelines(1);
logic.setFullTableScanEnabled(true);
logic.setReduceTypeMetadata(false);
logic.setReduceTypeMetadataPerShard(false);
logic.setReduceIngestTypes(false);
logic.setReduceIngestTypesPerShard(false);

GenericQueryConfiguration config = logic.initialize(client, settings, authSet);
logic.setupQuery(config);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ protected void runQuery(String query, Map<String,String> parameters, Set<String>

@Override
protected void runQuery(String query, Map<String,String> parameters, Set<String> expected, boolean reduceTypeMetadata) throws Exception {
super.runQuery(query, parameters, expected, false, client);
super.runQuery(query, parameters, expected, reduceTypeMetadata, client);
}
}

Expand Down Expand Up @@ -321,6 +321,44 @@ public void testSopranoDecimalTypes() throws Exception {
runQuery(queryString, extraParameters, expected, true);
}

@Test
public void testSopranoDecimalTypesWithModel() throws Exception {
// relevant model info
// fwd: AG = [AGE, ETA]
// rev: AGE = AG
// rev: ETA = AG
String queryString = "UUID == 'soprano' && AGE == 16";

Map<String,String> extraParameters = new HashMap<>();
extraParameters.put("include.grouping.context", "true");
extraParameters.put("return.fields", "AGE");

Set<String> expected = new HashSet<>();
expected.add(WiseGuysIngest.sopranoUID);

// query field AGE and return field AGE
runQuery(queryString, extraParameters, expected);
runQuery(queryString, extraParameters, expected, true);

// query field AGE and return field AG
queryString = "UUID == 'soprano' && AGE == 16";
extraParameters.put("return.fields", "AG");
runQuery(queryString, extraParameters, expected);
runQuery(queryString, extraParameters, expected, true);

// query field AG and return field AGE
queryString = "UUID == 'soprano' && AG == 16";
extraParameters.put("return.fields", "AGE");
runQuery(queryString, extraParameters, expected);
runQuery(queryString, extraParameters, expected, true);

// query field AG and return field AG
queryString = "UUID == 'soprano' && AG == 16";
extraParameters.put("return.fields", "AG");
runQuery(queryString, extraParameters, expected);
runQuery(queryString, extraParameters, expected, true);
}

@Test
public void testSopranoStringTypes() throws Exception {
String queryString = "UUID == 'soprano'";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package datawave.query.jexl.visitors;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;

import java.util.Collections;
import java.util.Set;

import org.apache.commons.jexl3.parser.ParseException;
import org.junit.jupiter.api.Test;

import com.google.common.collect.Sets;

class VariableNameVisitorTest {

@Test
void testLeaves() {
// @formatter:off
Object[][] queries = new Object[][] {
{"F == null", Collections.singleton("F")},
{"F == '1'", Collections.singleton("F")},
{"F != '1'", Collections.singleton("F")},
{"F > '1'", Collections.singleton("F")},
{"F < '1'", Collections.singleton("F")},
{"F >= '1'", Collections.singleton("F")},
{"F <= '1'", Collections.singleton("F")}
};
// @formatter:on

test(queries);
}

@Test
void testJunctions() {
// @formatter:off
Object[][] queries = new Object[][] {
// simple junctions
{"F1 == '1' || F2 == '2'", Sets.newHashSet("F1", "F2")},
{"F1 == '1' && F2 == '2'", Sets.newHashSet("F1", "F2")},
// single nested junction
{"F1 == '1' || (F2 == '2' && F3 == '3')", Sets.newHashSet("F1", "F2", "F3")},
{"F1 == '1' && (F2 == '2' || F3 == '3')", Sets.newHashSet("F1", "F2", "F3")},
// double nested junctions
{"(F1 == '1' && F2 == '2') || (F3 == '3' || F4 == '4')", Sets.newHashSet("F1", "F2", "F3", "F4")},
{"(F1 == '1' || F2 == '2') && (F3 == '3' && F4 == '4')", Sets.newHashSet("F1", "F2", "F3", "F4")}
};
// @formatter:on

test(queries);
}

@Test
void testMarkers() {
// @formatter:off
Object[][] queries = new Object[][] {
{"((_Bounded_ = true) && (F > '2' && F < '5'))", Sets.newHashSet("F", "_Bounded_")},
{"((_Delayed_ = true) && (F == '1'))", Sets.newHashSet("F", "_Delayed_")},
{"((_Eval_ = true) && (F == '1'))", Sets.newHashSet("F", "_Eval_")},
{"((_List_ = true) && ((id = 'id') && (field = 'F') && (params = '{\"ranges\":[[\"[r1\",\"r2]\"],[\"[r3\",\"f4]\"]]}')))", Collections.singleton("F")},
{"((_Value_ = true) && (F =~ 'ba.*'))", Sets.newHashSet("F", "_Value_")},
{"((_Term_ = true) && (_ANYFIELD_ =~ 'ba.*'))", Sets.newHashSet("_ANYFIELD_", "_Term_")},
{"((_Hole_ = true) && (F == '1'))", Sets.newHashSet("F", "_Hole_")},
{"((_Drop_ = true) && (F == '1'))", Sets.newHashSet("F", "_Drop_")},
{"((_Lenient_ = true) && (F == '1'))", Sets.newHashSet("F", "_Lenient_")},
{"((_Strict_ = true) && (F == '1'))", Sets.newHashSet("F", "_Strict_")}
};
// @formatter:on

test(queries);
}

@Test
void testQueryWithDroppedNodes() {
// document this use case
String query = "(((_Drop_ = true) && ((_Query_ = 'AGE > \\'abc10\\'') && (_Reason_ = 'Normalizations failed and not strict'))) || ((_Drop_ = true) && ((_Query_ = 'ETA > \\'abc10\\'') && (_Reason_ = 'Normalizations failed and not strict'))))";
Set<String> expected = Sets.newHashSet("_Drop_", "_Query_", "_Reason_");
test(query, expected);
}

@SuppressWarnings("unchecked")
private void test(Object[][] queries) {
for (Object[] query : queries) {
test((String) query[0], (Set<String>) query[1]);
}
}

private void test(String query, Set<String> expected) {
try {
Set<String> names = VariableNameVisitor.parseQuery(query);
assertEquals(expected, names);
} catch (ParseException e) {
fail("Failed to parse: " + query);
throw new IllegalArgumentException("Failed to parse query: " + query);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ public static void setup() {
queryModel.addTermToModel("X", "A");
queryModel.addTermToModel("Y", "B");
queryModel.addTermToModel("Z", "C");
queryModel.addTermToReverseModel("A", "X");
queryModel.addTermToReverseModel("B", "Y");
queryModel.addTermToReverseModel("C", "Z");

attributeFactory = new AttributeFactory(typeMetadata);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@
<property name="queryPlanner" ref="DefaultQueryPlanner" />
<!--<property name="queryMacroFunction" ref="queryMacroFunction" />-->
<property name="markingFunctions" ref="markingFunctions" />
<property name="reduceTypeMetadata" value="true" />
<property name="reduceTypeMetadataPerShard" value="false" />
</bean>

<util:list id="IvaratorCacheDirConfigs">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@
<property name="reduceIngestTypesPerShard" value="true"/>
<property name="reduceQueryFields" value="false" />
<property name="reduceQueryFieldsPerShard" value="false" />
<property name="reduceTypeMetadata" value="false" />
<property name="reduceTypeMetadata" value="true" />
<property name="reduceTypeMetadataPerShard" value="false" />
<!-- should the query prune fields by ingest type -->
<property name="pruneQueryByIngestTypes" value="false" />
Expand Down

0 comments on commit abc879e

Please sign in to comment.