From ee8cfd57bcbeead6948745529424e02a42be330d Mon Sep 17 00:00:00 2001
From: jruaux <jruaux@gmail.com>
Date: Mon, 8 Feb 2021 22:39:14 -0800
Subject: [PATCH] added record filters

---
 build.gradle                                  |  2 +-
 .../java/com/redislabs/riot/file/TestCsv.java | 15 ++++++++
 .../file/src/test/resources/csv/beers.csv     |  2 +-
 .../resources/csv/import-hmset-filter.txt     |  1 +
 .../test/resources/csv/import-hmset-regex.txt |  1 +
 .../riot/KeyValueProcessingOptions.java       |  7 +++-
 .../riot/processor/FilteringProcessor.java    | 37 +++++++++++++++++++
 docs/processing.adoc                          | 30 +++++++++++----
 docs/riot-file.adoc                           | 12 +++---
 9 files changed, 92 insertions(+), 15 deletions(-)
 create mode 100644 connectors/file/src/test/resources/csv/import-hmset-filter.txt
 create mode 100644 connectors/file/src/test/resources/csv/import-hmset-regex.txt
 create mode 100644 core/src/main/java/com/redislabs/riot/processor/FilteringProcessor.java
diff --git a/build.gradle b/build.gradle
index 85455ee5f..1a9182e65 100644
--- a/build.gradle
+++ b/build.gradle
@@ -74,7 +74,7 @@ subprojects {
         apply plugin: 'application'
         application {
             applicationName = 'riot-' + project.name
-        } 
+        }
         dependencies {
             implementation project(':core')
             testImplementation project(':test')    
diff --git a/connectors/file/src/test/java/com/redislabs/riot/file/TestCsv.java b/connectors/file/src/test/java/com/redislabs/riot/file/TestCsv.java
index 73630885f..a4b28fc16 100644
--- a/connectors/file/src/test/java/com/redislabs/riot/file/TestCsv.java
+++ b/connectors/file/src/test/java/com/redislabs/riot/file/TestCsv.java
@@ -19,6 +19,21 @@ public void importHmset() throws Exception {
 		Assertions.assertEquals(COUNT, keys.size());
 	}
 
+	@Test
+	public void importHmsetFilter() throws Exception {
+		executeFile("/csv/import-hmset-filter.txt");
+		List<String> keys = sync.keys("beer:*");
+		Assertions.assertEquals(424, keys.size());
+	}
+
+	@Test
+	public void importHmsetRegex() throws Exception {
+		executeFile("/csv/import-hmset-regex.txt");
+		Map<String, String> airport1 = sync.hgetall("airport:1");
+		Assertions.assertEquals("Pacific", airport1.get("region"));
+		Assertions.assertEquals("Port_Moresby", airport1.get("city"));
+	}
+
 	@Test
 	public void importGlobHmset() throws Exception {
 		executeFile("/csv/import-glob-hmset.txt");
diff --git a/connectors/file/src/test/resources/csv/beers.csv b/connectors/file/src/test/resources/csv/beers.csv
index cffbe68f9..9567aad68 100644
--- a/connectors/file/src/test/resources/csv/beers.csv
+++ b/connectors/file/src/test/resources/csv/beers.csv
@@ -1,4 +1,4 @@
-,abv,ibu,id,name,style,brewery_id,ounces
+row,abv,ibu,id,name,style,brewery_id,ounces
 0,0.05,,1436,Pub Beer,American Pale Lager,408,12.0
 1,0.066,,2265,Devil's Cup,American Pale Ale (APA),177,12.0
 2,0.071,,2264,Rise of the Phoenix,American IPA,177,12.0
diff --git a/connectors/file/src/test/resources/csv/import-hmset-filter.txt b/connectors/file/src/test/resources/csv/import-hmset-filter.txt
new file mode 100644
index 000000000..8c9f805d7
--- /dev/null
+++ b/connectors/file/src/test/resources/csv/import-hmset-filter.txt
@@ -0,0 +1 @@
+❯ riot-file -h localhost -p 6379 import https://redis-developer.github.io/riot/beers.csv --filter "style == 'American IPA'" --header hmset --keyspace beer --keys id
\ No newline at end of file
diff --git a/connectors/file/src/test/resources/csv/import-hmset-regex.txt b/connectors/file/src/test/resources/csv/import-hmset-regex.txt
new file mode 100644
index 000000000..59dc815dd
--- /dev/null
+++ b/connectors/file/src/test/resources/csv/import-hmset-regex.txt
@@ -0,0 +1 @@
+❯ riot-file -h localhost -p 6379 import https://redis-developer.github.io/riot/airports.csv --regex "Tz=(?<region>\w+)\/(?<city>\w+)" --header hmset --keyspace airport --keys AirportID
\ No newline at end of file
diff --git a/core/src/main/java/com/redislabs/riot/KeyValueProcessingOptions.java b/core/src/main/java/com/redislabs/riot/KeyValueProcessingOptions.java
index a57e5f3d7..67ae7bfc8 100644
--- a/core/src/main/java/com/redislabs/riot/KeyValueProcessingOptions.java
+++ b/core/src/main/java/com/redislabs/riot/KeyValueProcessingOptions.java
@@ -1,6 +1,7 @@
 package com.redislabs.riot;
 
 import com.redislabs.riot.convert.RegexNamedGroupsExtractor;
+import com.redislabs.riot.processor.FilteringProcessor;
 import com.redislabs.riot.processor.MapProcessor;
 import com.redislabs.riot.processor.SpelProcessor;
 import io.lettuce.core.api.StatefulConnection;
@@ -13,11 +14,12 @@
 import java.text.SimpleDateFormat;
 import java.util.*;
 
-@Data
 public class KeyValueProcessingOptions {
 
     @Option(arity = "1..*", names = "--spel", description = "SpEL expression to produce a field", paramLabel = "<f=exp>")
     private Map<String, String> spelFields = new HashMap<>();
+    @Option(arity = "1..*", names = "--filter", description = "SpEL expression to filter records", paramLabel = "<exp>")
+    private List<String> filters = new ArrayList<>();
     @Option(arity = "1..*", names = "--var", description = "Register a variable in the SpEL processor context", paramLabel = "<v=exp>")
     private Map<String, String> variables = new HashMap<>();
     @Option(names = "--date", description = "Processor date format (default: ${DEFAULT-VALUE})", paramLabel = "<string>")
@@ -27,6 +29,9 @@ public class KeyValueProcessingOptions {
 
     public ItemProcessor<Map<String, Object>, Map<String, Object>> processor(StatefulConnection<String, String> connection) {
         List<ItemProcessor<Map<String, Object>, Map<String, Object>>> processors = new ArrayList<>();
+        if (!filters.isEmpty()) {
+            processors.add(new FilteringProcessor(filters));
+        }
         if (!spelFields.isEmpty()) {
             processors.add(new SpelProcessor(connection, new SimpleDateFormat(dateFormat), variables, spelFields));
         }
diff --git a/core/src/main/java/com/redislabs/riot/processor/FilteringProcessor.java b/core/src/main/java/com/redislabs/riot/processor/FilteringProcessor.java
new file mode 100644
index 000000000..9c2553acc
--- /dev/null
+++ b/core/src/main/java/com/redislabs/riot/processor/FilteringProcessor.java
@@ -0,0 +1,37 @@
+package com.redislabs.riot.processor;
+
+import org.springframework.batch.item.ItemProcessor;
+import org.springframework.expression.Expression;
+import org.springframework.expression.spel.standard.SpelExpressionParser;
+import org.springframework.expression.spel.support.StandardEvaluationContext;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+public class FilteringProcessor implements ItemProcessor<Map<String, Object>, Map<String, Object>> {
+
+    private final StandardEvaluationContext context;
+    private final List<Expression> expressions;
+
+    public FilteringProcessor(List<String> filters) {
+        this.context = new StandardEvaluationContext();
+        context.setPropertyAccessors(Collections.singletonList(new MapAccessor()));
+        SpelExpressionParser parser = new SpelExpressionParser();
+        this.expressions = new ArrayList<>();
+        for (String filter : filters) {
+            expressions.add(parser.parseExpression(filter));
+        }
+    }
+
+    @Override
+    public Map<String, Object> process(Map<String, Object> item) throws Exception {
+        for (Expression expression : expressions) {
+            if (Boolean.FALSE.equals(expression.getValue(context, item))) {
+                return null;
+            }
+        }
+        return item;
+    }
+}
diff --git a/docs/processing.adoc b/docs/processing.adoc
index be391dee9..b66fcdc4d 100644
--- a/docs/processing.adoc
+++ b/docs/processing.adoc
@@ -1,15 +1,31 @@
-{app-name} can process records with field expressions.
+The following processors can be applied to records in that order:
 
-You can specify field expressions to produce key/value pairs using the https://docs.spring.io/spring/docs/current/spring-framework-reference/core.html#expressions[Spring Expression Language] (SpEL): `field1=<exp>`, `field2=<exp>`, ...
+* Filters
+* Transforms
+* Regular expressions
 
-For example the expression `--spel field1='  generates a field named `field1` with always the same value `foo`: 
+==== Filter
 
-The input record is accessed through its field names (e.g. `field3=field1+field2`).
+Keep records that match a https://docs.spring.io/spring/docs/current/spring-framework-reference/core.html#expressions[Spring Expression Language] (SpEL) boolean expression
+
+`--filter "name matches '[a-zA-Z\\s]+'"`
+
+==== Transform
+
+Produce key/value pairs using SpEL: `field1=<exp>`, `field2=<exp>`, ...
+
+For example `--spel field1=' generates a field named `field1` with always the same value `foo`.
+
+Input fields are accessed by name (e.g. `field3=field1+field2`).
 
 The processor also exposes the following variables that can be called with the `#` prefix:
 
-* `redis`: Redis connection to issue any command, e.g. `name=#redis.hgetall('person1').lastName`
-* `date`: date parser/formatter, e.g. `epoch=#date.parse(mydate).getTime()`
-* `index`: sequence number of the item being generated, e.g. `id=#index`
+* `redis` : Redis connection to issue any command, e.g. `name=#redis.hgetall('person1').lastName`
+* `date` : date parser/formatter, e.g. `epoch=#date.parse(mydate).getTime()`
+* `index` : sequence number of the item being generated, e.g. `id=#index`
+
+==== Regex
 
+Extract patterns from source fields using regular expressions:
 
+`--regex "name=(?<first>\w+)\/(?<last>\w+)"`
\ No newline at end of file
diff --git a/docs/riot-file.adoc b/docs/riot-file.adoc
index 77106069b..5797c613b 100644
--- a/docs/riot-file.adoc
+++ b/docs/riot-file.adoc
@@ -27,7 +27,7 @@ endif::[]
 
 include::getting_started.adoc[]
 
-== Import
+== Importing
 
 Use the `import` command to import data from files in CSV, fixed-width, JSON, and XML formats.
 
@@ -35,7 +35,6 @@ Use the `import` command to import data from files in CSV, fixed-width, JSON, an
 
 include::import.adoc[]
 
-
 === File Paths
 Paths can include https://man7.org/linux/man-pages/man7/glob.7.html[wildcard patterns].
 
@@ -100,7 +99,6 @@ This creates hashes with keys `beer:1436`, `beer:2265`, ...
 include::{resources}/csv/import-geoadd.txt[]
 ----
 
-
 ==== Fixed-Length
 
 These files have fixed-width fields. The width of each field must be defined using the `--ranges` option.
@@ -199,8 +197,12 @@ Here is a sample XML file that can be imported by {app-name}:
 include::{resources}/xml/import-hmset.txt[]
 ----
 
+=== Processing
+
+include::processing.adoc[]
+
 
-=== Redis
+== Importing Redis Dumps
 
 {app-name} can also import Redis data structure files in JSON or XML formats (see Export -> Redis to generate such files).
 
@@ -224,7 +226,7 @@ These files look like this:
 include::{resources}/json/import.txt[]
 ----
 
-== Export
+== Exporting
 
 Use the `export` command to export data from Redis to files in CSV, fixed-width, JSON, or XML formats. These files can be gzip-compressed as well.