Skip to content

Commit

Permalink
CAPI-654 Add new concat/concat_lc actions to UpsertCondition used by …
Browse files Browse the repository at this point in the history
…ConditionalUpsertProcessorFactory

This allows us to support updating a derived field when we do an atomic update of one of the contributing fields
  • Loading branch information
timatbw committed Mar 11, 2024
1 parent 1c0dfa0 commit fc391eb
Show file tree
Hide file tree
Showing 2 changed files with 322 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.regex.Pattern;
import java.util.stream.Stream;

import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.search.BooleanClause;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
Expand All @@ -44,7 +45,9 @@
class UpsertCondition {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

private static final Pattern ACTION_PATTERN = Pattern.compile("^(skip|insert)|(upsert|retain):(\\*|[\\w,]+)|(nullify):([\\w,]+)$");
private static final Pattern ACTION_PATTERN = Pattern.compile(
"^(skip|insert)|(upsert|retain):(\\*|[\\w,]+)|(nullify):([\\w,]+)|(concat|concat_lc):([\\w]+):([\\w,|?]+)$"
);
private static final List<String> ALL_FIELDS = Collections.singletonList("*");

private final String name;
Expand Down Expand Up @@ -164,6 +167,8 @@ enum ActionType {
UPSERT, // copy some/all fields from the OLD doc (when they don't exist on the new doc)
RETAIN, // copy some/all fields from the OLD doc always
NULLIFY, // make sure specific fields are null before doc written
CONCAT, // set a field to be the concatenation of other fields from NEW or OLD doc
CONCAT_LC, // set a field to be the lowercase concatenation of other fields from NEW or OLD doc
INSERT, // just do a regular insert as normal
SKIP; // entirely skip inserting the doc
}
Expand Down Expand Up @@ -289,10 +294,12 @@ private static Predicate<SolrInputDocument> forField(String field, Predicate<Obj
private static class Action {
private final ActionType type;
private final List<String> fields;
private final String target;

Action(ActionType type, List<String> fields) {
Action(ActionType type, List<String> fields, String target) {
this.type = type;
this.fields = fields;
this.target = target;
}

static Action parse(String actionValue) {
Expand All @@ -302,6 +309,7 @@ static Action parse(String actionValue) {
}
ActionType type;
List<String> fields;
String target = null;
if (m.group(1) != null) {
if ("skip".equals(m.group(1))) {
type = ActionType.SKIP;
Expand All @@ -317,12 +325,21 @@ static Action parse(String actionValue) {
}
String fieldsConfig = m.group(3);
fields = Arrays.asList(fieldsConfig.split(","));
} else {
} else if (m.group(4) != null) {
type = ActionType.NULLIFY;
String fieldsConfig = m.group(5);
fields = Arrays.asList(fieldsConfig.split(","));
} else {
if ("concat".equals(m.group(6))) {
type = ActionType.CONCAT;
} else {
type = ActionType.CONCAT_LC;
}
target = m.group(7);
String fieldsConfig = m.group(8);
fields = Arrays.asList(fieldsConfig.split(","));
}
return new Action(type, fields);
return new Action(type, fields, target);
}

void run(SolrInputDocument oldDoc, SolrInputDocument newDoc) {
Expand All @@ -346,7 +363,51 @@ void run(SolrInputDocument oldDoc, SolrInputDocument newDoc) {
fields.forEach(field -> {
newDoc.setField(field, null);
});
} else if (type == ActionType.CONCAT || type == ActionType.CONCAT_LC) {
final StringBuilder builder = new StringBuilder();
for (String field : fields) {
final String fieldValue = getFieldValue(field, oldDoc, newDoc);
if (fieldValue == null) {
// One of the required fields is not present, so we can't set the target field
return;
}
builder.append(type == ActionType.CONCAT_LC ? fieldValue.toLowerCase() : fieldValue);
}
newDoc.setField(target, builder.toString());
}
}

private static String getFieldValue(String field, SolrInputDocument oldDoc, SolrInputDocument newDoc) {
boolean optional = field.endsWith("?");
for (String fieldName : StringUtils.removeEnd(field, "?").split("\\|")) {
String value = getFieldFromDoc(fieldName, newDoc);
if (value != null) {
return value;
}
value = getFieldFromDoc(fieldName, oldDoc);
if (value != null) {
return value;
}
}
return optional ? "" : null;
}

private static String getFieldFromDoc(String fieldName, SolrInputDocument doc) {
if (doc == null) {
return null;
}
Object fieldValue = doc.getFieldValue(fieldName);
if (fieldValue instanceof String) {
return (String)fieldValue;
}
if (fieldValue instanceof Map) {
final Object setValue = ((Map)fieldValue).get("set");
if (setValue instanceof String) {
return (String)setValue;
}
}
// Cannot support non-String types or collection (multi-valued field) types
return null;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,6 @@ public void givenUpsertAndNoOldDoc_whenRunning() {
assertThat(newDoc.getFieldValue("field"), is("left-alone"));
}


@Test
public void givenNullify_whenRunning() {
NamedList<String> args = namedList(ImmutableListMultimap.of(
Expand All @@ -658,6 +657,263 @@ public void givenNullify_whenRunning() {
assertThat(newDoc.getFieldValue("left-alone"), is("not-null"));
}

@Test
public void givenConcat_whenRunning() {
NamedList<String> args = namedList(ImmutableListMultimap.of(
"must_not", "NEW.derived_field:*",
"action", "concat:derived_field:field,other_field"
));

UpsertCondition condition = UpsertCondition.parse("concat", args);

assertThat(condition.getName(), is("concat"));

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("field", "Red");
newDoc.setField("other_field", "Blue");

assertTrue(condition.matches(null, newDoc));
assertThat(condition.run(null, newDoc), is(UpsertCondition.ActionType.CONCAT));

assertThat(newDoc.getFieldValue("derived_field"), is("RedBlue"));
assertThat(newDoc.getFieldValue("field"), is("Red"));
assertThat(newDoc.getFieldValue("other_field"), is("Blue"));
}

{
SolrInputDocument newDoc2 = new SolrInputDocument();
newDoc2.setField("derived_field", "AlreadySet");
assertFalse(condition.matches(null, newDoc2));
}
}

@Test
public void givenConcatLowercase_whenRunning() {
NamedList<String> args = namedList(ImmutableListMultimap.of(
"must_not", "NEW.derived_field:*",
"action", "concat_lc:derived_field:field,other_field,maybe_third?"
));

UpsertCondition condition = UpsertCondition.parse("concat", args);

{
SolrInputDocument docWithAllFields = new SolrInputDocument();
docWithAllFields.setField("field", "Red");
docWithAllFields.setField("other_field", "Blue");
docWithAllFields.setField("maybe_third", "Green");

assertTrue(condition.matches(null, docWithAllFields));
assertThat(condition.run(null, docWithAllFields), is(UpsertCondition.ActionType.CONCAT_LC));

assertThat(docWithAllFields.getFieldValue("derived_field"), is("redbluegreen"));
assertThat(docWithAllFields.getFieldValue("field"), is("Red"));
assertThat(docWithAllFields.getFieldValue("other_field"), is("Blue"));
assertThat(docWithAllFields.getFieldValue("maybe_third"), is("Green"));
}

{
SolrInputDocument docAlreadySetsDerived = new SolrInputDocument();
docAlreadySetsDerived.setField("derived_field", "AlreadySet");
assertFalse(condition.matches(null, docAlreadySetsDerived));
}

{
SolrInputDocument docWithoutOptional = new SolrInputDocument();
docWithoutOptional.setField("field", "Yellow");
docWithoutOptional.setField("other_field", "Orange");

assertTrue(condition.matches(null, docWithoutOptional));
assertThat(condition.run(null, docWithoutOptional), is(UpsertCondition.ActionType.CONCAT_LC));
assertThat(docWithoutOptional.getFieldValue("derived_field"), is("yelloworange"));
}

{
SolrInputDocument docWithoutRequired = new SolrInputDocument();
docWithoutRequired.setField("field", "Yellow");

assertTrue(condition.matches(null, docWithoutRequired));
assertThat(condition.run(null, docWithoutRequired), is(UpsertCondition.ActionType.CONCAT_LC));
assertThat(docWithoutRequired.getFieldValue("derived_field"), nullValue());
}
}

@Test
public void givenConcatWithFallbacks_whenRunning() {
NamedList<String> args = namedList(ImmutableListMultimap.of(
"must_not", "NEW.derived_field:*",
"action", "concat:derived_field:maybe_prefix?,main_field|fallback_field|last_resort,maybe_suffix?"
));

UpsertCondition condition = UpsertCondition.parse("concat", args);

{
SolrInputDocument mainPresent = new SolrInputDocument();
mainPresent.setField("main_field", "Red");
mainPresent.setField("last_resort", "Blue");
assertTrue(condition.matches(null, mainPresent));
assertThat(condition.run(null, mainPresent), is(UpsertCondition.ActionType.CONCAT));
assertThat(mainPresent.getFieldValue("derived_field"), is("Red"));
}

{
SolrInputDocument mainAbsent = new SolrInputDocument();
mainAbsent.setField("fallback_field", "Green");
mainAbsent.setField("last_resort", "Blue");
assertTrue(condition.matches(null, mainAbsent));
assertThat(condition.run(null, mainAbsent), is(UpsertCondition.ActionType.CONCAT));
assertThat(mainAbsent.getFieldValue("derived_field"), is("Green"));
}

{
SolrInputDocument onlyLast = new SolrInputDocument();
onlyLast.setField("last_resort", "Blue");
assertTrue(condition.matches(null, onlyLast));
assertThat(condition.run(null, onlyLast), is(UpsertCondition.ActionType.CONCAT));
assertThat(onlyLast.getFieldValue("derived_field"), is("Blue"));
}

{
SolrInputDocument preAndPost = new SolrInputDocument();
preAndPost.setField("fallback_field", "Green");
preAndPost.setField("maybe_prefix", "pre");
preAndPost.setField("maybe_suffix", "post");
assertTrue(condition.matches(null, preAndPost));
assertThat(condition.run(null, preAndPost), is(UpsertCondition.ActionType.CONCAT));
assertThat(preAndPost.getFieldValue("derived_field"), is("preGreenpost"));
}

{
SolrInputDocument onlyPreAndPost = new SolrInputDocument();
onlyPreAndPost.setField("maybe_prefix", "pre");
onlyPreAndPost.setField("maybe_suffix", "post");
assertTrue(condition.matches(null, onlyPreAndPost));
assertThat(condition.run(null, onlyPreAndPost), is(UpsertCondition.ActionType.CONCAT));
assertThat(onlyPreAndPost.getFieldValue("derived_field"), nullValue());
}
}

@Test
public void givenConcatWithOldDoc_whenRunning() {
NamedList<String> args = namedList(ImmutableListMultimap.of(
"must_not", "NEW.sku:*",
"action", "concat:sku:model_name|product_range,colour,size?"
));

UpsertCondition condition = UpsertCondition.parse("concat", args);

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("model_name", "Macbook");
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("model_name", "Powerbook");
oldDoc.setField("colour", "Silver");
oldDoc.setField("sku", "PowerbookSilver");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), is("MacbookSilver"));
}

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("model_name", "Macbook");
newDoc.setField("sku", "CustomOverride");
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("model_name", "Powerbook");
oldDoc.setField("colour", "Black");
oldDoc.setField("sku", "PowerbookBlack");
assertFalse(condition.matches(oldDoc, newDoc));
}

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("colour", "Grey");
newDoc.setField("product_range", "Laptop");
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("model_name", "Powerbook");
oldDoc.setField("colour", "Silver");
oldDoc.setField("size", "13in");
oldDoc.setField("sku", "PowerbookSilver13in");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), is("PowerbookGrey13in"));
// prefers old.model_name to new.product_range fallback
}

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("product_range", "Laptop");
newDoc.setField("size", "16in");
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("colour", "Silver");
oldDoc.setField("size", "17in");
oldDoc.setField("sku", "PowerbookSilver17in");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), is("LaptopSilver16in"));
// fallback to new.product_range since model_name unavailable in old and new
}

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("size", "16in");
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("colour", "Silver");
oldDoc.setField("size", "17in");
oldDoc.setField("sku", "PowerbookSilver17in");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), nullValue());
// both product_range and model_name unavailable in old and new
}
}

@Test
public void givenConcatWithAtomicUpdates_whenRunning() {
NamedList<String> args = namedList(ImmutableListMultimap.of(
"must_not", "NEW.sku:*",
"action", "concat:sku:model_name|product_range,colour,size?"
));

UpsertCondition condition = UpsertCondition.parse("concat", args);

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("model_name", Collections.singletonMap("set", "Macbook"));
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("model_name", "Powerbook");
oldDoc.setField("colour", "Silver");
oldDoc.setField("sku", "PowerbookSilver");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), is("MacbookSilver"));
}

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("unrelated_field", Collections.singletonMap("set", "English"));
newDoc.setField("size", Collections.singletonMap("set", "12in"));
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("model_name", "Powerbook");
oldDoc.setField("colour", "Silver");
oldDoc.setField("sku", "PowerbookSilver");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), is("PowerbookSilver12in"));
}

{
SolrInputDocument newDoc = new SolrInputDocument();
newDoc.setField("size", Collections.singletonMap("set", "12in"));
SolrInputDocument oldDoc = new SolrInputDocument();
oldDoc.setField("model_name", "Powerbook");
oldDoc.setField("sku", "PowerbookSilver");
assertTrue(condition.matches(oldDoc, newDoc));
assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT));
assertThat(newDoc.getFieldValue("sku"), nullValue());
}
}

@Test
public void givenExistingPermanentDelete_whenCheckingShouldInsertOrUpsert() {
List<UpsertCondition> conditions = givenMultipleConditions();
Expand Down

0 comments on commit fc391eb

Please sign in to comment.