From ee8cfd57bcbeead6948745529424e02a42be330d Mon Sep 17 00:00:00 2001 From: jruaux Date: Mon, 8 Feb 2021 22:39:14 -0800 Subject: [PATCH] added record filters --- build.gradle | 2 +- .../java/com/redislabs/riot/file/TestCsv.java | 15 ++++++++ .../file/src/test/resources/csv/beers.csv | 2 +- .../resources/csv/import-hmset-filter.txt | 1 + .../test/resources/csv/import-hmset-regex.txt | 1 + .../riot/KeyValueProcessingOptions.java | 7 +++- .../riot/processor/FilteringProcessor.java | 37 +++++++++++++++++++ docs/processing.adoc | 30 +++++++++++---- docs/riot-file.adoc | 12 +++--- 9 files changed, 92 insertions(+), 15 deletions(-) create mode 100644 connectors/file/src/test/resources/csv/import-hmset-filter.txt create mode 100644 connectors/file/src/test/resources/csv/import-hmset-regex.txt create mode 100644 core/src/main/java/com/redislabs/riot/processor/FilteringProcessor.java diff --git a/build.gradle b/build.gradle index 85455ee5f..1a9182e65 100644 --- a/build.gradle +++ b/build.gradle @@ -74,7 +74,7 @@ subprojects { apply plugin: 'application' application { applicationName = 'riot-' + project.name - } + } dependencies { implementation project(':core') testImplementation project(':test') diff --git a/connectors/file/src/test/java/com/redislabs/riot/file/TestCsv.java b/connectors/file/src/test/java/com/redislabs/riot/file/TestCsv.java index 73630885f..a4b28fc16 100644 --- a/connectors/file/src/test/java/com/redislabs/riot/file/TestCsv.java +++ b/connectors/file/src/test/java/com/redislabs/riot/file/TestCsv.java @@ -19,6 +19,21 @@ public void importHmset() throws Exception { Assertions.assertEquals(COUNT, keys.size()); } + @Test + public void importHmsetFilter() throws Exception { + executeFile("/csv/import-hmset-filter.txt"); + List keys = sync.keys("beer:*"); + Assertions.assertEquals(424, keys.size()); + } + + @Test + public void importHmsetRegex() throws Exception { + executeFile("/csv/import-hmset-regex.txt"); + Map airport1 = sync.hgetall("airport:1"); + Assertions.assertEquals("Pacific", airport1.get("region")); + Assertions.assertEquals("Port_Moresby", airport1.get("city")); + } + @Test public void importGlobHmset() throws Exception { executeFile("/csv/import-glob-hmset.txt"); diff --git a/connectors/file/src/test/resources/csv/beers.csv b/connectors/file/src/test/resources/csv/beers.csv index cffbe68f9..9567aad68 100644 --- a/connectors/file/src/test/resources/csv/beers.csv +++ b/connectors/file/src/test/resources/csv/beers.csv @@ -1,4 +1,4 @@ -,abv,ibu,id,name,style,brewery_id,ounces +row,abv,ibu,id,name,style,brewery_id,ounces 0,0.05,,1436,Pub Beer,American Pale Lager,408,12.0 1,0.066,,2265,Devil's Cup,American Pale Ale (APA),177,12.0 2,0.071,,2264,Rise of the Phoenix,American IPA,177,12.0 diff --git a/connectors/file/src/test/resources/csv/import-hmset-filter.txt b/connectors/file/src/test/resources/csv/import-hmset-filter.txt new file mode 100644 index 000000000..8c9f805d7 --- /dev/null +++ b/connectors/file/src/test/resources/csv/import-hmset-filter.txt @@ -0,0 +1 @@ +❯ riot-file -h localhost -p 6379 import https://redis-developer.github.io/riot/beers.csv --filter "style == 'American IPA'" --header hmset --keyspace beer --keys id \ No newline at end of file diff --git a/connectors/file/src/test/resources/csv/import-hmset-regex.txt b/connectors/file/src/test/resources/csv/import-hmset-regex.txt new file mode 100644 index 000000000..59dc815dd --- /dev/null +++ b/connectors/file/src/test/resources/csv/import-hmset-regex.txt @@ -0,0 +1 @@ +❯ riot-file -h localhost -p 6379 import https://redis-developer.github.io/riot/airports.csv --regex "Tz=(?\w+)\/(?\w+)" --header hmset --keyspace airport --keys AirportID \ No newline at end of file diff --git a/core/src/main/java/com/redislabs/riot/KeyValueProcessingOptions.java b/core/src/main/java/com/redislabs/riot/KeyValueProcessingOptions.java index a57e5f3d7..67ae7bfc8 100644 --- a/core/src/main/java/com/redislabs/riot/KeyValueProcessingOptions.java +++ b/core/src/main/java/com/redislabs/riot/KeyValueProcessingOptions.java @@ -1,6 +1,7 @@ package com.redislabs.riot; import com.redislabs.riot.convert.RegexNamedGroupsExtractor; +import com.redislabs.riot.processor.FilteringProcessor; import com.redislabs.riot.processor.MapProcessor; import com.redislabs.riot.processor.SpelProcessor; import io.lettuce.core.api.StatefulConnection; @@ -13,11 +14,12 @@ import java.text.SimpleDateFormat; import java.util.*; -@Data public class KeyValueProcessingOptions { @Option(arity = "1..*", names = "--spel", description = "SpEL expression to produce a field", paramLabel = "") private Map spelFields = new HashMap<>(); + @Option(arity = "1..*", names = "--filter", description = "SpEL expression to filter records", paramLabel = "") + private List filters = new ArrayList<>(); @Option(arity = "1..*", names = "--var", description = "Register a variable in the SpEL processor context", paramLabel = "") private Map variables = new HashMap<>(); @Option(names = "--date", description = "Processor date format (default: ${DEFAULT-VALUE})", paramLabel = "") @@ -27,6 +29,9 @@ public class KeyValueProcessingOptions { public ItemProcessor, Map> processor(StatefulConnection connection) { List, Map>> processors = new ArrayList<>(); + if (!filters.isEmpty()) { + processors.add(new FilteringProcessor(filters)); + } if (!spelFields.isEmpty()) { processors.add(new SpelProcessor(connection, new SimpleDateFormat(dateFormat), variables, spelFields)); } diff --git a/core/src/main/java/com/redislabs/riot/processor/FilteringProcessor.java b/core/src/main/java/com/redislabs/riot/processor/FilteringProcessor.java new file mode 100644 index 000000000..9c2553acc --- /dev/null +++ b/core/src/main/java/com/redislabs/riot/processor/FilteringProcessor.java @@ -0,0 +1,37 @@ +package com.redislabs.riot.processor; + +import org.springframework.batch.item.ItemProcessor; +import org.springframework.expression.Expression; +import org.springframework.expression.spel.standard.SpelExpressionParser; +import org.springframework.expression.spel.support.StandardEvaluationContext; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class FilteringProcessor implements ItemProcessor, Map> { + + private final StandardEvaluationContext context; + private final List expressions; + + public FilteringProcessor(List filters) { + this.context = new StandardEvaluationContext(); + context.setPropertyAccessors(Collections.singletonList(new MapAccessor())); + SpelExpressionParser parser = new SpelExpressionParser(); + this.expressions = new ArrayList<>(); + for (String filter : filters) { + expressions.add(parser.parseExpression(filter)); + } + } + + @Override + public Map process(Map item) throws Exception { + for (Expression expression : expressions) { + if (Boolean.FALSE.equals(expression.getValue(context, item))) { + return null; + } + } + return item; + } +} diff --git a/docs/processing.adoc b/docs/processing.adoc index be391dee9..b66fcdc4d 100644 --- a/docs/processing.adoc +++ b/docs/processing.adoc @@ -1,15 +1,31 @@ -{app-name} can process records with field expressions. +The following processors can be applied to records in that order: -You can specify field expressions to produce key/value pairs using the https://docs.spring.io/spring/docs/current/spring-framework-reference/core.html#expressions[Spring Expression Language] (SpEL): `field1=`, `field2=`, ... +* Filters +* Transforms +* Regular expressions -For example the expression `--spel field1=' generates a field named `field1` with always the same value `foo`: +==== Filter -The input record is accessed through its field names (e.g. `field3=field1+field2`). +Keep records that match a https://docs.spring.io/spring/docs/current/spring-framework-reference/core.html#expressions[Spring Expression Language] (SpEL) boolean expression + +`--filter "name matches '[a-zA-Z\\s]+'"` + +==== Transform + +Produce key/value pairs using SpEL: `field1=`, `field2=`, ... + +For example `--spel field1=' generates a field named `field1` with always the same value `foo`. + +Input fields are accessed by name (e.g. `field3=field1+field2`). The processor also exposes the following variables that can be called with the `#` prefix: -* `redis`: Redis connection to issue any command, e.g. `name=#redis.hgetall('person1').lastName` -* `date`: date parser/formatter, e.g. `epoch=#date.parse(mydate).getTime()` -* `index`: sequence number of the item being generated, e.g. `id=#index` +* `redis` : Redis connection to issue any command, e.g. `name=#redis.hgetall('person1').lastName` +* `date` : date parser/formatter, e.g. `epoch=#date.parse(mydate).getTime()` +* `index` : sequence number of the item being generated, e.g. `id=#index` + +==== Regex +Extract patterns from source fields using regular expressions: +`--regex "name=(?\w+)\/(?\w+)"` \ No newline at end of file diff --git a/docs/riot-file.adoc b/docs/riot-file.adoc index 77106069b..5797c613b 100644 --- a/docs/riot-file.adoc +++ b/docs/riot-file.adoc @@ -27,7 +27,7 @@ endif::[] include::getting_started.adoc[] -== Import +== Importing Use the `import` command to import data from files in CSV, fixed-width, JSON, and XML formats. @@ -35,7 +35,6 @@ Use the `import` command to import data from files in CSV, fixed-width, JSON, an include::import.adoc[] - === File Paths Paths can include https://man7.org/linux/man-pages/man7/glob.7.html[wildcard patterns]. @@ -100,7 +99,6 @@ This creates hashes with keys `beer:1436`, `beer:2265`, ... include::{resources}/csv/import-geoadd.txt[] ---- - ==== Fixed-Length These files have fixed-width fields. The width of each field must be defined using the `--ranges` option. @@ -199,8 +197,12 @@ Here is a sample XML file that can be imported by {app-name}: include::{resources}/xml/import-hmset.txt[] ---- +=== Processing + +include::processing.adoc[] + -=== Redis +== Importing Redis Dumps {app-name} can also import Redis data structure files in JSON or XML formats (see Export -> Redis to generate such files). @@ -224,7 +226,7 @@ These files look like this: include::{resources}/json/import.txt[] ---- -== Export +== Exporting Use the `export` command to export data from Redis to files in CSV, fixed-width, JSON, or XML formats. These files can be gzip-compressed as well.