Skip to content

Commit

Permalink
Ensure Discovery Queries only return indexed fields (#2557)
Browse files Browse the repository at this point in the history
* Ignore non indexed fields in discovery query

* Add unit test for this feature
  • Loading branch information
foster33 authored Nov 21, 2024
1 parent d9c48e4 commit 6de9e50
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ public GenericQueryConfiguration initialize(AccumuloClient client, Query setting
// Update the query model.
setQueryModel(metadataHelper.getQueryModel(getModelTableName(), getModelName(), null));

// Set the currently indexed fields
getConfig().setIndexedFields(metadataHelper.getIndexedFields(Collections.emptySet()));

// Set the connector.
getConfig().setClient(client);

Expand Down Expand Up @@ -603,7 +606,7 @@ public void setupQuery(GenericQueryConfiguration genericConfig) throws QueryExce
bs.fetchColumnFamily(new Text(cf));
}

iterators.add(transformScanner(bs, qd));
iterators.add(transformScanner(bs, qd, config.getIndexedFields()));
}
this.iterator = concat(iterators.iterator());
}
Expand All @@ -614,13 +617,16 @@ public ShardIndexQueryTable clone() {
}

/**
* Takes in a batch scanner and returns an iterator over the DiscoveredThing objects contained in the value.
* Takes in a batch scanner, removes all DiscoveredThings that do not have an indexed field, and returns an iterator over the DiscoveredThing objects
* contained in the value.
*
* @param scanner
* a batch scanner
* @param indexedFields
* set of currently indexed fields
* @return iterator for discoveredthings
*/
private Iterator<DiscoveredThing> transformScanner(final BatchScanner scanner, final QueryData queryData) {
private Iterator<DiscoveredThing> transformScanner(final BatchScanner scanner, final QueryData queryData, Set<String> indexedFields) {
return concat(transform(scanner.iterator(), new Function<Entry<Key,Value>,Iterator<DiscoveredThing>>() {
DataInputBuffer in = new DataInputBuffer();

Expand All @@ -638,7 +644,12 @@ public Iterator<DiscoveredThing> apply(Entry<Key,Value> from) {
}
ArrayList<DiscoveredThing> thangs = Lists.newArrayListWithCapacity(aw.get().length);
for (Writable w : aw.get()) {
thangs.add((DiscoveredThing) w);
// Check to see if the field is currently indexed, if it's not, we should NOT be adding it to 'thangs'
if (indexedFields.contains(((DiscoveredThing) w).getField())) {
thangs.add((DiscoveredThing) w);
} else {
log.debug(((DiscoveredThing) w).getField() + " was NOT found in IndexedFields");
}
}
return thangs.iterator();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ private void writeData() throws Throwable {
writeEntries("VEHICLE", "ranger", "stock", "BAR", "20130102", 5, 5, 5);
writeEntries("VEHICLE", "ranger", "stock", "FOO", "20130103", 20, 15, 2);
writeEntries("VEHICLE", "ranger", "stock", "BAR", "20130103", 6, 1, 2);
writeEntries("NON_INDEXED_FIELD", "coffee", "csv", "FOO", "20130101", 1, 1, 1);
writeEntries("NON_INDEXED_FIELD", "espresso", "csv", "FOO", "20130102", 1, 5, 5);

writeForwardModel("ANIMAL", "ROOSTER");
writeForwardModel("ANIMAL", "BIRD");
Expand All @@ -146,7 +148,9 @@ private void writeEntries(String field, String term, String datatype, String vis
try (BatchWriter writer = client.createBatchWriter(QueryTestTableHelper.METADATA_TABLE_NAME, config)) {
Mutation mutation = new Mutation(field);
mutation.put("t", datatype + "\u0000" + LcNoDiacriticsType.class.getName(), columnVisibility, BLANK_VALUE);
mutation.put("i", datatype + "\u0000" + dateStr, columnVisibility, new Value(SummingCombiner.VAR_LEN_ENCODER.encode(1L)));
if (!field.equals("NON_INDEXED_FIELD")) {
mutation.put("i", datatype + "\u0000" + dateStr, columnVisibility, new Value(SummingCombiner.VAR_LEN_ENCODER.encode(1L)));
}
mutation.put("ri", datatype + "\u0000" + dateStr, columnVisibility, new Value(SummingCombiner.VAR_LEN_ENCODER.encode(1L)));
writer.addMutation(mutation);
}
Expand Down Expand Up @@ -385,6 +389,19 @@ public void testFieldLiteralAndPattern() throws Exception {
assertQueryResults();
}

@Test
public void testIgnoreNonIndexedField() throws Exception {
givenQuery("coffee OR espresso OR rooster");
givenStartDate("20130101");
givenEndDate("20130104");

expect(new DiscoveredThing("rooster", "FLOCK", "stock", "20130101", "BAR", 30L, new MapWritable()));
expect(new DiscoveredThing("rooster", "FLOCK", "stock", "20130102", "BAR", 30L, new MapWritable()));
expect(new DiscoveredThing("rooster", "FLOCK", "stock", "20130103", "BAR", 30L, new MapWritable()));

assertQueryResults();
}

@Test
public void testReverse() throws Exception {
givenQuery("*.sky*er");
Expand Down

0 comments on commit 6de9e50

Please sign in to comment.