diff --git a/solr/core/src/java/org/apache/solr/update/processor/ConditionalUpsertProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ConditionalUpsertProcessorFactory.java index 6df13680fa37..2967ac3f4a25 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/ConditionalUpsertProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/ConditionalUpsertProcessorFactory.java @@ -20,7 +20,6 @@ import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.List; -import java.util.Map; import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; diff --git a/solr/core/src/java/org/apache/solr/update/processor/UpsertCondition.java b/solr/core/src/java/org/apache/solr/update/processor/UpsertCondition.java index 48191a3440c2..4af24872ff30 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/UpsertCondition.java +++ b/solr/core/src/java/org/apache/solr/update/processor/UpsertCondition.java @@ -31,6 +31,7 @@ import java.util.regex.Pattern; import java.util.stream.Stream; +import org.apache.commons.lang3.StringUtils; import org.apache.lucene.search.BooleanClause; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; @@ -44,7 +45,9 @@ class UpsertCondition { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private static final Pattern ACTION_PATTERN = Pattern.compile("^(skip|insert)|(upsert|retain):(\\*|[\\w,]+)|(nullify):([\\w,]+)$"); + private static final Pattern ACTION_PATTERN = Pattern.compile( + "^(skip|insert)|(upsert|retain):(\\*|[\\w,]+)|(nullify):([\\w,]+)|(concat|concat_lc):([\\w]+):([\\w,|?]+)$" + ); private static final List ALL_FIELDS = Collections.singletonList("*"); private final String name; @@ -137,6 +140,7 @@ ActionType run(SolrInputDocument oldDoc, SolrInputDocument newDoc) { boolean matches(SolrInputDocument oldDoc, SolrInputDocument newDoc) { Docs docs = new Docs(oldDoc, newDoc); boolean atLeastOneMatched = false; + boolean hasPositive = false; for (FieldRule rule: rules) { boolean ruleMatched = rule.matches(docs); switch(rule.getOccur()) { @@ -145,25 +149,28 @@ boolean matches(SolrInputDocument oldDoc, SolrInputDocument newDoc) { return false; } atLeastOneMatched = true; + hasPositive = true; break; case MUST_NOT: if (ruleMatched) { return false; } - atLeastOneMatched = true; break; default: atLeastOneMatched = ruleMatched || atLeastOneMatched; + hasPositive = true; break; } } - return atLeastOneMatched; + return atLeastOneMatched || !hasPositive; } enum ActionType { UPSERT, // copy some/all fields from the OLD doc (when they don't exist on the new doc) RETAIN, // copy some/all fields from the OLD doc always NULLIFY, // make sure specific fields are null before doc written + CONCAT, // attempt to set a field to be the concatenation of other fields from NEW or OLD doc + CONCAT_LC, // attempt to set a field to be the lowercase concatenation of other fields from NEW or OLD doc INSERT, // just do a regular insert as normal SKIP; // entirely skip inserting the doc } @@ -289,10 +296,12 @@ private static Predicate forField(String field, Predicate fields; + private final String target; - Action(ActionType type, List fields) { + Action(ActionType type, List fields, String target) { this.type = type; this.fields = fields; + this.target = target; } static Action parse(String actionValue) { @@ -302,6 +311,7 @@ static Action parse(String actionValue) { } ActionType type; List fields; + String target = null; if (m.group(1) != null) { if ("skip".equals(m.group(1))) { type = ActionType.SKIP; @@ -317,12 +327,21 @@ static Action parse(String actionValue) { } String fieldsConfig = m.group(3); fields = Arrays.asList(fieldsConfig.split(",")); - } else { + } else if (m.group(4) != null) { type = ActionType.NULLIFY; String fieldsConfig = m.group(5); fields = Arrays.asList(fieldsConfig.split(",")); + } else { + if ("concat".equals(m.group(6))) { + type = ActionType.CONCAT; + } else { + type = ActionType.CONCAT_LC; + } + target = m.group(7); + String fieldsConfig = m.group(8); + fields = Arrays.asList(fieldsConfig.split(",")); } - return new Action(type, fields); + return new Action(type, fields, target); } void run(SolrInputDocument oldDoc, SolrInputDocument newDoc) { @@ -346,7 +365,51 @@ void run(SolrInputDocument oldDoc, SolrInputDocument newDoc) { fields.forEach(field -> { newDoc.setField(field, null); }); + } else if (type == ActionType.CONCAT || type == ActionType.CONCAT_LC) { + final StringBuilder builder = new StringBuilder(); + for (String field : fields) { + final String fieldValue = getFieldValue(field, oldDoc, newDoc); + if (fieldValue == null) { + // One of the required fields is not present, so we can't set the target field + return; + } + builder.append(type == ActionType.CONCAT_LC ? fieldValue.toLowerCase(Locale.ROOT) : fieldValue); + } + newDoc.setField(target, builder.toString()); + } + } + + private static String getFieldValue(String field, SolrInputDocument oldDoc, SolrInputDocument newDoc) { + boolean optional = field.endsWith("?"); + for (String fieldName : StringUtils.removeEnd(field, "?").split("\\|")) { + String value = getFieldFromDoc(fieldName, newDoc); + if (value != null) { + return value; + } + value = getFieldFromDoc(fieldName, oldDoc); + if (value != null) { + return value; + } + } + return optional ? "" : null; + } + + private static String getFieldFromDoc(String fieldName, SolrInputDocument doc) { + if (doc == null) { + return null; + } + Object fieldValue = doc.getFieldValue(fieldName); + if (fieldValue instanceof String) { + return (String)fieldValue; + } + if (fieldValue instanceof Map) { + final Object setValue = ((Map)fieldValue).get("set"); + if (setValue instanceof String) { + return (String)setValue; + } } + // Cannot support non-String types or collection (multi-valued field) types + return null; } } } diff --git a/solr/core/src/test/org/apache/solr/update/processor/UpsertConditionTest.java b/solr/core/src/test/org/apache/solr/update/processor/UpsertConditionTest.java index 62511ba1f4ac..ad6dee1f1f06 100644 --- a/solr/core/src/test/org/apache/solr/update/processor/UpsertConditionTest.java +++ b/solr/core/src/test/org/apache/solr/update/processor/UpsertConditionTest.java @@ -442,6 +442,64 @@ public void givenMultipleShouldClauses_whenMatching() { assertFalse(condition.matches(oldDoc, newDoc)); } + @Test + public void givenMultipleShouldAndMustNotClauses_whenMatching() { + NamedList args = namedList(ImmutableListMultimap.of( + "should", "NEW.field1:*", + "should", "NEW.field2:*", + "must_not", "NEW.field3:*", + "action", "skip" + )); + + UpsertCondition condition = UpsertCondition.parse("skip-it", args); + + assertThat(condition.getName(), is("skip-it")); + + { + SolrInputDocument newDoc = new SolrInputDocument(); + assertFalse(condition.matches(null, newDoc)); + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("field3", "anything"); + assertFalse(condition.matches(null, newDoc)); + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("field2", "anything"); + assertTrue(condition.matches(null, newDoc)); + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("field1", "anything"); + newDoc.setField("field2", "anything-else"); + assertTrue(condition.matches(null, newDoc)); + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("field2", "anything"); + newDoc.setField("field3", "stuff"); + assertFalse(condition.matches(null, newDoc)); + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("field99", "anything"); + assertFalse(condition.matches(null, newDoc)); + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("field99", "anything"); + newDoc.setField("field3", "stuff"); + assertFalse(condition.matches(null, newDoc)); + } + } + @Test public void givenMustAndMustNotClauses_whenMatching() { NamedList args = namedList(ImmutableListMultimap.of( @@ -631,7 +689,6 @@ public void givenUpsertAndNoOldDoc_whenRunning() { assertThat(newDoc.getFieldValue("field"), is("left-alone")); } - @Test public void givenNullify_whenRunning() { NamedList args = namedList(ImmutableListMultimap.of( @@ -658,6 +715,344 @@ public void givenNullify_whenRunning() { assertThat(newDoc.getFieldValue("left-alone"), is("not-null")); } + @Test + public void givenConcat_whenRunning() { + NamedList args = namedList(ImmutableListMultimap.of( + "must_not", "NEW.derived_field:*", + "action", "concat:derived_field:field,other_field" + )); + + UpsertCondition condition = UpsertCondition.parse("concat", args); + + assertThat(condition.getName(), is("concat")); + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("field", "Red"); + newDoc.setField("other_field", "Blue"); + + assertTrue(condition.matches(null, newDoc)); + assertThat(condition.run(null, newDoc), is(UpsertCondition.ActionType.CONCAT)); + + assertThat(newDoc.getFieldValue("derived_field"), is("RedBlue")); + assertThat(newDoc.getFieldValue("field"), is("Red")); + assertThat(newDoc.getFieldValue("other_field"), is("Blue")); + } + + { + SolrInputDocument newDoc2 = new SolrInputDocument(); + newDoc2.setField("derived_field", "AlreadySet"); + assertFalse(condition.matches(null, newDoc2)); + } + } + + @Test + public void givenConcatLowercase_whenRunning() { + NamedList args = namedList(ImmutableListMultimap.of( + "must_not", "NEW.derived_field:*", + "action", "concat_lc:derived_field:field,other_field,maybe_third?" + )); + + UpsertCondition condition = UpsertCondition.parse("concat", args); + + { + SolrInputDocument docWithAllFields = new SolrInputDocument(); + docWithAllFields.setField("field", "Red"); + docWithAllFields.setField("other_field", "Blue"); + docWithAllFields.setField("maybe_third", "Green"); + + assertTrue(condition.matches(null, docWithAllFields)); + assertThat(condition.run(null, docWithAllFields), is(UpsertCondition.ActionType.CONCAT_LC)); + + assertThat(docWithAllFields.getFieldValue("derived_field"), is("redbluegreen")); + assertThat(docWithAllFields.getFieldValue("field"), is("Red")); + assertThat(docWithAllFields.getFieldValue("other_field"), is("Blue")); + assertThat(docWithAllFields.getFieldValue("maybe_third"), is("Green")); + } + + { + SolrInputDocument docAlreadySetsDerived = new SolrInputDocument(); + docAlreadySetsDerived.setField("derived_field", "AlreadySet"); + assertFalse(condition.matches(null, docAlreadySetsDerived)); + } + + { + SolrInputDocument docWithoutOptional = new SolrInputDocument(); + docWithoutOptional.setField("field", "Yellow"); + docWithoutOptional.setField("other_field", "Orange"); + + assertTrue(condition.matches(null, docWithoutOptional)); + assertThat(condition.run(null, docWithoutOptional), is(UpsertCondition.ActionType.CONCAT_LC)); + assertThat(docWithoutOptional.getFieldValue("derived_field"), is("yelloworange")); + } + + { + SolrInputDocument docWithoutRequired = new SolrInputDocument(); + docWithoutRequired.setField("field", "Yellow"); + + assertTrue(condition.matches(null, docWithoutRequired)); + assertThat(condition.run(null, docWithoutRequired), is(UpsertCondition.ActionType.CONCAT_LC)); + assertThat(docWithoutRequired.getFieldValue("derived_field"), nullValue()); + } + } + + @Test + public void givenConcatWithFallbacks_whenRunning() { + NamedList args = namedList(ImmutableListMultimap.of( + "must_not", "NEW.derived_field:*", + "action", "concat:derived_field:maybe_prefix?,main_field|fallback_field|last_resort,maybe_suffix?" + )); + + UpsertCondition condition = UpsertCondition.parse("concat", args); + + { + SolrInputDocument mainPresent = new SolrInputDocument(); + mainPresent.setField("main_field", "Red"); + mainPresent.setField("last_resort", "Blue"); + assertTrue(condition.matches(null, mainPresent)); + assertThat(condition.run(null, mainPresent), is(UpsertCondition.ActionType.CONCAT)); + assertThat(mainPresent.getFieldValue("derived_field"), is("Red")); + } + + { + SolrInputDocument mainAbsent = new SolrInputDocument(); + mainAbsent.setField("fallback_field", "Green"); + mainAbsent.setField("last_resort", "Blue"); + assertTrue(condition.matches(null, mainAbsent)); + assertThat(condition.run(null, mainAbsent), is(UpsertCondition.ActionType.CONCAT)); + assertThat(mainAbsent.getFieldValue("derived_field"), is("Green")); + } + + { + SolrInputDocument onlyLast = new SolrInputDocument(); + onlyLast.setField("last_resort", "Blue"); + assertTrue(condition.matches(null, onlyLast)); + assertThat(condition.run(null, onlyLast), is(UpsertCondition.ActionType.CONCAT)); + assertThat(onlyLast.getFieldValue("derived_field"), is("Blue")); + } + + { + SolrInputDocument preAndPost = new SolrInputDocument(); + preAndPost.setField("fallback_field", "Green"); + preAndPost.setField("maybe_prefix", "pre"); + preAndPost.setField("maybe_suffix", "post"); + assertTrue(condition.matches(null, preAndPost)); + assertThat(condition.run(null, preAndPost), is(UpsertCondition.ActionType.CONCAT)); + assertThat(preAndPost.getFieldValue("derived_field"), is("preGreenpost")); + } + + { + SolrInputDocument onlyPreAndPost = new SolrInputDocument(); + onlyPreAndPost.setField("maybe_prefix", "pre"); + onlyPreAndPost.setField("maybe_suffix", "post"); + assertTrue(condition.matches(null, onlyPreAndPost)); + assertThat(condition.run(null, onlyPreAndPost), is(UpsertCondition.ActionType.CONCAT)); + assertThat(onlyPreAndPost.getFieldValue("derived_field"), nullValue()); + } + } + + @Test + public void givenConcatWithOldDoc_whenRunning() { + NamedList args = namedList(ImmutableListMultimap.of( + "must_not", "NEW.sku:*", + "action", "concat:sku:model_name|product_range,colour,size?" + )); + + UpsertCondition condition = UpsertCondition.parse("concat", args); + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("model_name", "Macbook"); + SolrInputDocument oldDoc = new SolrInputDocument(); + oldDoc.setField("model_name", "Powerbook"); + oldDoc.setField("colour", "Silver"); + oldDoc.setField("sku", "PowerbookSilver"); + assertTrue(condition.matches(oldDoc, newDoc)); + assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT)); + assertThat(newDoc.getFieldValue("sku"), is("MacbookSilver")); + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("model_name", "Macbook"); + newDoc.setField("sku", "CustomOverride"); + SolrInputDocument oldDoc = new SolrInputDocument(); + oldDoc.setField("model_name", "Powerbook"); + oldDoc.setField("colour", "Black"); + oldDoc.setField("sku", "PowerbookBlack"); + assertFalse(condition.matches(oldDoc, newDoc)); + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("colour", "Grey"); + newDoc.setField("product_range", "Laptop"); + SolrInputDocument oldDoc = new SolrInputDocument(); + oldDoc.setField("model_name", "Powerbook"); + oldDoc.setField("colour", "Silver"); + oldDoc.setField("size", "13in"); + oldDoc.setField("sku", "PowerbookSilver13in"); + assertTrue(condition.matches(oldDoc, newDoc)); + assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT)); + assertThat(newDoc.getFieldValue("sku"), is("PowerbookGrey13in")); + // prefers old.model_name to new.product_range fallback + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("product_range", "Laptop"); + newDoc.setField("size", "16in"); + SolrInputDocument oldDoc = new SolrInputDocument(); + oldDoc.setField("colour", "Silver"); + oldDoc.setField("size", "17in"); + oldDoc.setField("sku", "PowerbookSilver17in"); + assertTrue(condition.matches(oldDoc, newDoc)); + assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT)); + assertThat(newDoc.getFieldValue("sku"), is("LaptopSilver16in")); + // fallback to new.product_range since model_name unavailable in old and new + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("colour", "Grey"); + SolrInputDocument oldDoc = new SolrInputDocument(); + oldDoc.setField("product_range", "Laptop"); + oldDoc.setField("colour", "Silver"); + oldDoc.setField("size", "17in"); + oldDoc.setField("sku", "PowerbookSilver17in"); + assertTrue(condition.matches(oldDoc, newDoc)); + assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT)); + assertThat(newDoc.getFieldValue("sku"), is("LaptopGrey17in")); + // fallback to old.product_range since model_name unavailable in old and new + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("size", "16in"); + SolrInputDocument oldDoc = new SolrInputDocument(); + oldDoc.setField("colour", "Silver"); + oldDoc.setField("size", "17in"); + oldDoc.setField("sku", "PowerbookSilver17in"); + assertTrue(condition.matches(oldDoc, newDoc)); + assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT)); + assertThat(newDoc.getFieldValue("sku"), nullValue()); + // both product_range and model_name unavailable in old and new + } + } + + @Test + public void givenConcatWithAtomicUpdates_whenRunning() { + NamedList args = namedList(ImmutableListMultimap.builder() + .put("should", "NEW.model_name:*") + .put("should", "NEW.product_range:*") + .put("should", "NEW.colour:*") + .put("should", "NEW.size:*") + .put("must_not", "NEW.sku:*") + .put("action", "concat:sku:model_name|product_range,colour,size?") + .build() + ); + + UpsertCondition condition = UpsertCondition.parse("concat", args); + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("model_name", Collections.singletonMap("set", "Macbook")); + SolrInputDocument oldDoc = new SolrInputDocument(); + oldDoc.setField("model_name", "Powerbook"); + oldDoc.setField("colour", "Silver"); + oldDoc.setField("sku", "PowerbookSilver"); + assertTrue(condition.matches(oldDoc, newDoc)); + assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT)); + assertThat(newDoc.getFieldValue("sku"), is("MacbookSilver")); + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("unrelated_field", Collections.singletonMap("set", "English")); + newDoc.setField("size", Collections.singletonMap("set", "12in")); + SolrInputDocument oldDoc = new SolrInputDocument(); + oldDoc.setField("model_name", "Powerbook"); + oldDoc.setField("colour", "Silver"); + oldDoc.setField("sku", "PowerbookSilver"); + assertTrue(condition.matches(oldDoc, newDoc)); + assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT)); + assertThat(newDoc.getFieldValue("sku"), is("PowerbookSilver12in")); + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("unrelated_field", Collections.singletonMap("set", "English")); + SolrInputDocument oldDoc = new SolrInputDocument(); + oldDoc.setField("model_name", "Powerbook"); + oldDoc.setField("colour", "Silver"); + oldDoc.setField("sku", "PowerbookSilver"); + assertFalse(condition.matches(oldDoc, newDoc)); + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("size", Collections.singletonMap("set", "12in")); + SolrInputDocument oldDoc = new SolrInputDocument(); + oldDoc.setField("model_name", "Powerbook"); + oldDoc.setField("sku", "PowerbookSilver"); + assertTrue(condition.matches(oldDoc, newDoc)); + assertThat(condition.run(oldDoc, newDoc), is(UpsertCondition.ActionType.CONCAT)); + assertThat(newDoc.getFieldValue("sku"), nullValue()); + } + } + + @Test + public void givenConcatWithMultipleConditions_whenRunning() { + NamedList args = namedList(ImmutableListMultimap.>builder() + .put("modelBased", namedList(ImmutableListMultimap.of( + "must_not", "NEW.sku:*", + "action", "concat:sku:model_name,colour,size?" + ))) + .put("productBased", namedList(ImmutableListMultimap.of( + "must_not", "NEW.sku:*", + "action", "concat:sku:product_range,colour,size?" + ))) + .build() + ); + + final List conditions = UpsertCondition.readConditions(args); + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("model_name", Collections.singletonMap("set", "Macbook")); + newDoc.setField("product_range", Collections.singletonMap("set", "Laptop")); + SolrInputDocument oldDoc = new SolrInputDocument(); + oldDoc.setField("model_name", "Powerbook"); + oldDoc.setField("colour", "Silver"); + oldDoc.setField("sku", "PowerbookSilver"); + assertThat(UpsertCondition.shouldInsertOrUpsert(conditions, oldDoc, newDoc), is(true)); + assertThat(newDoc.getFieldValue("sku"), is("MacbookSilver")); + // does not go on to set sku using product_range because new.sku is set from first condition + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("product_range", Collections.singletonMap("set", "Laptop")); + SolrInputDocument oldDoc = new SolrInputDocument(); + oldDoc.setField("colour", "Silver"); + oldDoc.setField("sku", "PowerbookSilver"); + assertThat(UpsertCondition.shouldInsertOrUpsert(conditions, oldDoc, newDoc), is(true)); + assertThat(newDoc.getFieldValue("sku"), is("LaptopSilver")); + // first condition is not able to actually set new.sku so second condition matches + } + + { + SolrInputDocument newDoc = new SolrInputDocument(); + newDoc.setField("colour", Collections.singletonMap("set", "Black")); + SolrInputDocument oldDoc = new SolrInputDocument(); + oldDoc.setField("colour", "Silver"); + oldDoc.setField("sku", "PowerbookSilver"); + assertThat(UpsertCondition.shouldInsertOrUpsert(conditions, oldDoc, newDoc), is(true)); + assertThat(newDoc.getFieldValue("sku"), nullValue()); + // neither condition was able to actually set new.sku + } + } + @Test public void givenExistingPermanentDelete_whenCheckingShouldInsertOrUpsert() { List conditions = givenMultipleConditions();