-
Notifications
You must be signed in to change notification settings - Fork 237
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fixed bug where unique results of null values failed to return #2354
Merged
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
d5d4962
Fixed bug where unique results of null values failed to return
ivakegg 76889b9
Added tests to ensure FinalDoc and intermediate results are passed
ivakegg 061f7d3
Fix javadoc
ivakegg eb7e734
Merge branch 'integration' into bugfix/uniqueNulls
ivakegg 394d465
Merge branch 'integration' into bugfix/uniqueNulls
ivakegg f2f3549
Merge branch 'integration' into bugfix/uniqueNulls
ivakegg File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
import java.io.Serializable; | ||
import java.net.URI; | ||
import java.util.ArrayList; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.Comparator; | ||
import java.util.Iterator; | ||
|
@@ -39,10 +40,8 @@ | |
import datawave.query.iterator.ivarator.IvaratorCacheDirConfig; | ||
import datawave.query.iterator.profile.FinalDocumentTrackingIterator; | ||
import datawave.query.model.QueryModel; | ||
import datawave.query.tables.ShardQueryLogic; | ||
import datawave.query.util.sortedset.ByteArrayComparator; | ||
import datawave.query.util.sortedset.FileByteDocumentSortedSet; | ||
import datawave.query.util.sortedset.FileKeySortedSet; | ||
import datawave.query.util.sortedset.FileKeyValueSortedSet; | ||
import datawave.query.util.sortedset.FileSortedSet; | ||
import datawave.query.util.sortedset.HdfsBackedSortedSet; | ||
|
@@ -59,7 +58,6 @@ public class UniqueTransform extends DocumentTransform.DefaultDocumentTransform | |
|
||
private BloomFilter<byte[]> bloom; | ||
private UniqueFields uniqueFields = new UniqueFields(); | ||
private Multimap<String,String> modelMapping; | ||
private HdfsBackedSortedSet<Entry<byte[],Document>> set; | ||
private HdfsBackedSortedSet<Entry<Key,Document>> returnSet; | ||
private Iterator<Entry<Key,Document>> setIterator; | ||
|
@@ -89,40 +87,13 @@ public UniqueTransform(UniqueFields uniqueFields, long queryExecutionForPageTime | |
} | ||
} | ||
|
||
/* | ||
* Create a new {@link UniqueTransform} that will use a bloom filter to return on those results that are unique per the uniqueFields. Special uniqueness can | ||
* be requested for date/time fields (@see UniqueFields). The logic will be used to get a query model to include the reverse mappings in the unique field | ||
* set | ||
* | ||
* @param logic The query logic from whih to pull the query model | ||
* | ||
* @param uniqueFields The unique fields | ||
* | ||
* @param queryExecutionForPageTimeout If this timeout is passed before since the last result was returned, then an "intermediate" result is returned | ||
* denoting we are still looking for the next unique result. | ||
*/ | ||
public UniqueTransform(ShardQueryLogic logic, UniqueFields uniqueFields, long queryExecutionForPageTimeout) { | ||
this(uniqueFields, queryExecutionForPageTimeout); | ||
QueryModel model = logic.getQueryModel(); | ||
if (model != null) { | ||
modelMapping = HashMultimap.create(); | ||
// reverse the reverse query mapping which will give us a mapping from the final field name to the original field name(s) | ||
for (Map.Entry<String,String> entry : model.getReverseQueryMapping().entrySet()) { | ||
modelMapping.put(entry.getValue(), entry.getKey()); | ||
} | ||
} | ||
setModelMappings(model); | ||
} | ||
|
||
/** | ||
* Update the configuration of this transform. If the configuration is actually changing, then the bloom filter will be reset as well. | ||
* | ||
* @param uniqueFields | ||
* The new set of unique fields. | ||
* @param model | ||
* The query model | ||
*/ | ||
public void updateConfig(UniqueFields uniqueFields, QueryModel model) { | ||
public void updateConfig(UniqueFields uniqueFields) { | ||
// only reset the bloom filter if changing the field set | ||
if (!this.uniqueFields.equals(uniqueFields)) { | ||
this.uniqueFields = uniqueFields.clone(); | ||
|
@@ -132,23 +103,6 @@ public void updateConfig(UniqueFields uniqueFields, QueryModel model) { | |
log.trace("unique fields: " + this.uniqueFields.getFields()); | ||
} | ||
} | ||
setModelMappings(model); | ||
} | ||
|
||
/** | ||
* Set the query model from which the reverse query mappings are pulled. | ||
* | ||
* @param model | ||
* The query model | ||
*/ | ||
private void setModelMappings(QueryModel model) { | ||
if (model != null) { | ||
modelMapping = HashMultimap.create(); | ||
// reverse the reverse query mapping which will give us a mapping from the final field name to the original field name(s) | ||
for (Map.Entry<String,String> entry : model.getReverseQueryMapping().entrySet()) { | ||
modelMapping.put(entry.getValue(), entry.getKey()); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
|
@@ -177,6 +131,10 @@ public Entry<Key,Document> apply(@Nullable Entry<Key,Document> keyDocumentEntry) | |
return keyDocumentEntry; | ||
} | ||
|
||
if (keyDocumentEntry.getValue().isIntermediateResult()) { | ||
return keyDocumentEntry; | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was the critical piece to fix the null document issue. |
||
try { | ||
if (set != null) { | ||
byte[] signature = getBytes(keyDocumentEntry.getValue()); | ||
|
@@ -280,50 +238,48 @@ byte[] getBytes(Document document) throws IOException { | |
* if we failed to generate the byte array | ||
*/ | ||
private void outputSortedFieldValues(Document document, DataOutputStream output) throws IOException { | ||
int count = 0; | ||
String lastField = ""; | ||
List<String> values = new ArrayList<>(); | ||
Multimap<String,String> values = HashMultimap.create(); | ||
for (String documentField : new TreeSet<>(document.getDictionary().keySet())) { | ||
String field = getUniqueField(documentField); | ||
if (field != null) { | ||
if (!field.equals(lastField)) { | ||
count = dumpValues(count, lastField, values, output); | ||
lastField = field; | ||
} | ||
addValues(field, document.get(documentField), values); | ||
} | ||
} | ||
dumpValues(count, lastField, values, output); | ||
// Always dump the fields in the same order (uniqueFields.getFields is a sorted collection) | ||
for (String field : uniqueFields.getFields()) { | ||
dumpValues(field, values.get(field), output); | ||
} | ||
output.flush(); | ||
} | ||
|
||
/** | ||
* Dump a list of values, sorted, to the data output stream | ||
* | ||
* @param count | ||
* value count | ||
* @param field | ||
* a field | ||
* @param values | ||
* the list of values | ||
* @param output | ||
* the output stream | ||
* @return The next field count | ||
* @throws IOException | ||
* for issues with read/write | ||
*/ | ||
private int dumpValues(int count, String field, List<String> values, DataOutputStream output) throws IOException { | ||
private void dumpValues(String field, Collection<String> values, DataOutputStream output) throws IOException { | ||
String separator = "f-" + field + ":"; | ||
if (!values.isEmpty()) { | ||
Collections.sort(values); | ||
String separator = "f-" + field + '/' + (count++) + ":"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding the count in here is not really useful. |
||
for (String value : values) { | ||
List<String> valueList = new ArrayList<>(values); | ||
// always output values in sorted order. | ||
Collections.sort(valueList); | ||
for (String value : valueList) { | ||
output.writeUTF(separator); | ||
output.writeUTF(value); | ||
separator = ","; | ||
} | ||
values.clear(); | ||
} else { | ||
// dump at least a header for empty value sets to ensure we have some bytes to check against | ||
// in the bloom filter. | ||
output.writeUTF(separator); | ||
} | ||
return count; | ||
} | ||
|
||
/** | ||
|
@@ -334,16 +290,16 @@ private int dumpValues(int count, String field, List<String> values, DataOutputS | |
* @param attribute | ||
* The attribute | ||
* @param values | ||
* The list of values to be updated | ||
* The map of values to be updated | ||
*/ | ||
private void addValues(final String field, Attribute<?> attribute, List<String> values) { | ||
private void addValues(final String field, Attribute<?> attribute, Multimap<String,String> values) { | ||
if (attribute instanceof Attributes) { | ||
// @formatter:off | ||
((Attributes) attribute).getAttributes().stream() | ||
.forEach(a -> addValues(field, a, values)); | ||
// @formatter:on | ||
} else { | ||
values.add(uniqueFields.transformValue(field, String.valueOf(attribute.getData()))); | ||
values.put(field, uniqueFields.transformValue(field, String.valueOf(attribute.getData()))); | ||
} | ||
} | ||
|
||
|
@@ -376,8 +332,7 @@ private String getFieldWithoutGrouping(String field) { | |
} | ||
|
||
/** | ||
* Return whether or not the provided document field is considered a case-insensitive match for the provided field, applying reverse model mappings if | ||
* configured. | ||
* Return whether or not the provided document field is considered a case-insensitive match for the provided field | ||
* | ||
* @param baseField | ||
* The base field | ||
|
@@ -386,9 +341,7 @@ private String getFieldWithoutGrouping(String field) { | |
* @return true if matching | ||
*/ | ||
private boolean isMatchingField(String baseField, String field) { | ||
baseField = baseField.toUpperCase(); | ||
field = field.toUpperCase(); | ||
return field.equals(baseField) || (modelMapping != null && modelMapping.get(field).contains(baseField)); | ||
return baseField.equalsIgnoreCase(field); | ||
} | ||
|
||
/** | ||
|
@@ -573,10 +526,6 @@ public Builder withQueryExecutionForPageTimeout(long timeout) { | |
public UniqueTransform build() throws IOException { | ||
UniqueTransform transform = new UniqueTransform(uniqueFields, queryExecutionForPageTimeout); | ||
|
||
if (model != null) { | ||
transform.setModelMappings(model); | ||
} | ||
|
||
if (transform.uniqueFields.isMostRecent()) { | ||
// @formatter:off | ||
// noinspection unchecked | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This whole mapping is already done in the DefaultQueryPlanner when setting up the query fields. This was redundant.