Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CAPI-654 Add new concat/concat_lc actions to UpsertCondition used by … #64

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.regex.Pattern;
import java.util.stream.Stream;

import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.search.BooleanClause;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
Expand All @@ -44,7 +45,9 @@
class UpsertCondition {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

private static final Pattern ACTION_PATTERN = Pattern.compile("^(skip|insert)|(upsert|retain):(\\*|[\\w,]+)|(nullify):([\\w,]+)$");
private static final Pattern ACTION_PATTERN = Pattern.compile(
"^(skip|insert)|(upsert|retain):(\\*|[\\w,]+)|(nullify):([\\w,]+)|(concat|concat_lc):([\\w]+):([\\w,|?]+)$"
);
private static final List<String> ALL_FIELDS = Collections.singletonList("*");

private final String name;
Expand Down Expand Up @@ -164,6 +167,8 @@ enum ActionType {
UPSERT, // copy some/all fields from the OLD doc (when they don't exist on the new doc)
RETAIN, // copy some/all fields from the OLD doc always
NULLIFY, // make sure specific fields are null before doc written
CONCAT, // set a field to be the concatenation of other fields from NEW or OLD doc
CONCAT_LC, // set a field to be the lowercase concatenation of other fields from NEW or OLD doc
INSERT, // just do a regular insert as normal
SKIP; // entirely skip inserting the doc
}
Expand Down Expand Up @@ -289,10 +294,12 @@ private static Predicate<SolrInputDocument> forField(String field, Predicate<Obj
private static class Action {
private final ActionType type;
private final List<String> fields;
private final String target;

Action(ActionType type, List<String> fields) {
Action(ActionType type, List<String> fields, String target) {
this.type = type;
this.fields = fields;
this.target = target;
}

static Action parse(String actionValue) {
Expand All @@ -302,6 +309,7 @@ static Action parse(String actionValue) {
}
ActionType type;
List<String> fields;
String target = null;
if (m.group(1) != null) {
if ("skip".equals(m.group(1))) {
type = ActionType.SKIP;
Expand All @@ -317,12 +325,21 @@ static Action parse(String actionValue) {
}
String fieldsConfig = m.group(3);
fields = Arrays.asList(fieldsConfig.split(","));
} else {
} else if (m.group(4) != null) {
type = ActionType.NULLIFY;
String fieldsConfig = m.group(5);
fields = Arrays.asList(fieldsConfig.split(","));
} else {
if ("concat".equals(m.group(6))) {
type = ActionType.CONCAT;
} else {
type = ActionType.CONCAT_LC;
}
target = m.group(7);
String fieldsConfig = m.group(8);
fields = Arrays.asList(fieldsConfig.split(","));
}
return new Action(type, fields);
return new Action(type, fields, target);
}

void run(SolrInputDocument oldDoc, SolrInputDocument newDoc) {
Expand All @@ -346,7 +363,51 @@ void run(SolrInputDocument oldDoc, SolrInputDocument newDoc) {
fields.forEach(field -> {
newDoc.setField(field, null);
});
} else if (type == ActionType.CONCAT || type == ActionType.CONCAT_LC) {
final StringBuilder builder = new StringBuilder();
for (String field : fields) {
final String fieldValue = getFieldValue(field, oldDoc, newDoc);
if (fieldValue == null) {
// One of the required fields is not present, so we can't set the target field
return;
}
builder.append(type == ActionType.CONCAT_LC ? fieldValue.toLowerCase() : fieldValue);
}
newDoc.setField(target, builder.toString());
}
}

private static String getFieldValue(String field, SolrInputDocument oldDoc, SolrInputDocument newDoc) {
boolean optional = field.endsWith("?");
for (String fieldName : StringUtils.removeEnd(field, "?").split("\\|")) {
String value = getFieldFromDoc(fieldName, newDoc);
if (value != null) {
return value;
}
value = getFieldFromDoc(fieldName, oldDoc);
if (value != null) {
return value;
}
}
return optional ? "" : null;
}

private static String getFieldFromDoc(String fieldName, SolrInputDocument doc) {
if (doc == null) {
return null;
}
Object fieldValue = doc.getFieldValue(fieldName);
if (fieldValue instanceof String) {
return (String)fieldValue;
}
if (fieldValue instanceof Map) {
final Object setValue = ((Map)fieldValue).get("set");
if (setValue instanceof String) {
return (String)setValue;
}
}
// Cannot support non-String types or collection (multi-valued field) types
return null;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,6 @@ public void givenUpsertAndNoOldDoc_whenRunning() {
assertThat(newDoc.getFieldValue("field"), is("left-alone"));
}


@Test
public void givenNullify_whenRunning() {
NamedList<String> args = namedList(ImmutableListMultimap.of(
Expand All @@ -658,6 +657,263 @@ public void givenNullify_whenRunning() {
assertThat(newDoc.getFieldValue("left-alone"), is("not-null"));
}

@Test
public void givenConcat_whenRunning() {
NamedList<String> args = namedList(ImmutableListMultimap.of(
"must_not", "NEW.derived_field:*",
"action", "concat:derived_field:field,other_field"
));

UpsertCondition condition = UpsertCondition.parse("concat", args);

assertThat(condition.getName(), is("concat"));

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("field", "Red");
newDoc.setField("other_field", "Blue");

assertTrue(condition.matches(null, newDoc));
assertThat(condition.run(null, newDoc), is(UpsertCondition.ActionType.CONCAT));

assertThat(newDoc.getFieldValue("derived_field"), is("RedBlue"));
assertThat(newDoc.getFieldValue("field"), is("Red"));
assertThat(newDoc.getFieldValue("other_field"), is("Blue"));
}

{
SolrInputDocument newDoc2 = new SolrInputDocument();
newDoc2.setField("derived_field", "AlreadySet");
assertFalse(condition.matches(null, newDoc2));
}
}

@Test
public void givenConcatLowercase_whenRunning() {
NamedList<String> args = namedList(ImmutableListMultimap.of(
"must_not", "NEW.derived_field:*",
"action", "concat_lc:derived_field:field,other_field,maybe_third?"
));

UpsertCondition condition = UpsertCondition.parse("concat", args);

{
SolrInputDocument docWithAllFields = new SolrInputDocument();
docWithAllFields.setField("field", "Red");
docWithAllFields.setField("other_field", "Blue");
docWithAllFields.setField("maybe_third", "Green");

assertTrue(condition.matches(null, docWithAllFields));
assertThat(condition.run(null, docWithAllFields), is(UpsertCondition.ActionType.CONCAT_LC));

assertThat(docWithAllFields.getFieldValue("derived_field"), is("redbluegreen"));
assertThat(docWithAllFields.getFieldValue("field"), is("Red"));
assertThat(docWithAllFields.getFieldValue("other_field"), is("Blue"));
assertThat(docWithAllFields.getFieldValue("maybe_third"), is("Green"));
}

{
SolrInputDocument docAlreadySetsDerived = new SolrInputDocument();
docAlreadySetsDerived.setField("derived_field", "AlreadySet");
assertFalse(condition.matches(null, docAlreadySetsDerived));
}

{
SolrInputDocument docWithoutOptional = new SolrInputDocument();
docWithoutOptional.setField("field", "Yellow");
docWithoutOptional.setField("other_field", "Orange");

assertTrue(condition.matches(null, docWithoutOptional));
assertThat(condition.run(null, docWithoutOptional), is(UpsertCondition.ActionType.CONCAT_LC));
assertThat(docWithoutOptional.getFieldValue("derived_field"), is("yelloworange"));
}

{
SolrInputDocument docWithoutRequired = new SolrInputDocument();
docWithoutRequired.setField("field", "Yellow");

assertTrue(condition.matches(null, docWithoutRequired));
assertThat(condition.run(null, docWithoutRequired), is(UpsertCondition.ActionType.CONCAT_LC));
assertThat(docWithoutRequired.getFieldValue("derived_field"), nullValue());
}
}

@Test
public void givenConcatWithFallbacks_whenRunning() {
NamedList<String> args = namedList(ImmutableListMultimap.of(
"must_not", "NEW.derived_field:*",
"action", "concat:derived_field:maybe_prefix?,main_field|fallback_field|last_resort,maybe_suffix?"
));

UpsertCondition condition = UpsertCondition.parse("concat", args);

{
SolrInputDocument mainPresent = new SolrInputDocument();
mainPresent.setField("main_field", "Red");
mainPresent.setField("last_resort", "Blue");
assertTrue(condition.matches(null, mainPresent));
assertThat(condition.run(null, mainPresent), is(UpsertCondition.ActionType.CONCAT));
assertThat(mainPresent.getFieldValue("derived_field"), is("Red"));
}

{
SolrInputDocument mainAbsent = new SolrInputDocument();
mainAbsent.setField("fallback_field", "Green");
mainAbsent.setField("last_resort", "Blue");
assertTrue(condition.matches(null, mainAbsent));
assertThat(condition.run(null, mainAbsent), is(UpsertCondition.ActionType.CONCAT));
assertThat(mainAbsent.getFieldValue("derived_field"), is("Green"));
}

{
SolrInputDocument onlyLast = new SolrInputDocument();
onlyLast.setField("last_resort", "Blue");
assertTrue(condition.matches(null, onlyLast));
assertThat(condition.run(null, onlyLast), is(UpsertCondition.ActionType.CONCAT));
assertThat(onlyLast.getFieldValue("derived_field"), is("Blue"));
}

{
SolrInputDocument preAndPost = new SolrInputDocument();
preAndPost.setField("fallback_field", "Green");
preAndPost.setField("maybe_prefix", "pre");
preAndPost.setField("maybe_suffix", "post");
assertTrue(condition.matches(null, preAndPost));
assertThat(condition.run(null, preAndPost), is(UpsertCondition.ActionType.CONCAT));
assertThat(preAndPost.getFieldValue("derived_field"), is("preGreenpost"));
}

{
SolrInputDocument onlyPreAndPost = new SolrInputDocument();
onlyPreAndPost.setField("maybe_prefix", "pre");
onlyPreAndPost.setField("maybe_suffix", "post");
assertTrue(condition.matches(null, onlyPreAndPost));
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a bit subtle/weird that this will count as matching, but won't have actually set derived_field. I guess it might just need calling out a bit more clearly in a comment or something?

I wonder if we need a test to confirm that having multiple rules where the first rule does this behaves as we expect? e.g. only have optional fields for the first rule, but then confirm it does/doesn't interfere with later rules?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah matching doesn't necessarily imply it will take action, because the condition check is quite a complex boolean ((old.required OR new.required) AND (old.another OR new.another)). It's more of a pre-check for when it could conceivably be relevant

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would expect the conditional clauses for using this action would be should for all the contributing fields, and must_not for the derived field itself

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can add some tests for when there's multiple UpsertCondition in the overall configuration, to check that subsequent rules do/don't match depending on the outcome of an earlier concat action.

But the other issue, making the conditions accurately match only when the concat action will actually be able to set the derived field, I don't think we can achieve that with the current condition matching logic: it's not expressive enough to handle nested AND/OR conditions that we'd need. Effectively we'd need the condition matching to do the same old/new fallback logic that the action currently implements.

assertThat(condition.run(null, onlyPreAndPost), is(UpsertCondition.ActionType.CONCAT));
assertThat(onlyPreAndPost.getFieldValue("derived_field"), nullValue());
}
}

@Test
public void givenConcatWithOldDoc_whenRunning() {
NamedList<String> args = namedList(ImmutableListMultimap.of(
"must_not", "NEW.sku:*",
"action", "concat:sku:model_name|product_range,colour,size?"
));

UpsertCondition condition = UpsertCondition.parse("concat", args);

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("model_name", "Macbook");
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("model_name", "Powerbook");
oldDoc.setField("colour", "Silver");
oldDoc.setField("sku", "PowerbookSilver");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), is("MacbookSilver"));
}

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("model_name", "Macbook");
newDoc.setField("sku", "CustomOverride");
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("model_name", "Powerbook");
oldDoc.setField("colour", "Black");
oldDoc.setField("sku", "PowerbookBlack");
assertFalse(condition.matches(oldDoc, newDoc));
}

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("colour", "Grey");
newDoc.setField("product_range", "Laptop");
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("model_name", "Powerbook");
oldDoc.setField("colour", "Silver");
oldDoc.setField("size", "13in");
oldDoc.setField("sku", "PowerbookSilver13in");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), is("PowerbookGrey13in"));
// prefers old.model_name to new.product_range fallback
}

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("product_range", "Laptop");
newDoc.setField("size", "16in");
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("colour", "Silver");
oldDoc.setField("size", "17in");
oldDoc.setField("sku", "PowerbookSilver17in");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), is("LaptopSilver16in"));
// fallback to new.product_range since model_name unavailable in old and new
}

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("size", "16in");
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("colour", "Silver");
oldDoc.setField("size", "17in");
oldDoc.setField("sku", "PowerbookSilver17in");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), nullValue());
// both product_range and model_name unavailable in old and new
}
}

@Test
public void givenConcatWithAtomicUpdates_whenRunning() {
NamedList<String> args = namedList(ImmutableListMultimap.of(
"must_not", "NEW.sku:*",
"action", "concat:sku:model_name|product_range,colour,size?"
));

UpsertCondition condition = UpsertCondition.parse("concat", args);

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("model_name", Collections.singletonMap("set", "Macbook"));
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("model_name", "Powerbook");
oldDoc.setField("colour", "Silver");
oldDoc.setField("sku", "PowerbookSilver");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), is("MacbookSilver"));
}

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("unrelated_field", Collections.singletonMap("set", "English"));
newDoc.setField("size", Collections.singletonMap("set", "12in"));
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("model_name", "Powerbook");
oldDoc.setField("colour", "Silver");
oldDoc.setField("sku", "PowerbookSilver");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), is("PowerbookSilver12in"));
}

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("size", Collections.singletonMap("set", "12in"));
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("model_name", "Powerbook");
oldDoc.setField("sku", "PowerbookSilver");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), nullValue());
}
}

@Test
public void givenExistingPermanentDelete_whenCheckingShouldInsertOrUpsert() {
List<UpsertCondition> conditions = givenMultipleConditions();
Expand Down
Loading