diff --git a/build.gradle b/build.gradle index 2654ddd..e8a5d90 100644 --- a/build.gradle +++ b/build.gradle @@ -35,6 +35,10 @@ java { dependencies { compileOnly libs.embulk.spi compileOnly libs.slf4j + + testImplementation platform(libs.junit5.bom) + testImplementation libs.bundles.junit5.implementation + testRuntimeOnly libs.bundles.junit5.runtime } javadoc { @@ -175,6 +179,7 @@ signing { } test { + useJUnitPlatform() testLogging { events "passed", "skipped", "failed", "standardOut", "standardError" exceptionFormat = org.gradle.api.tasks.testing.logging.TestExceptionFormat.FULL diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 70b5fe7..91064a6 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -2,6 +2,24 @@ embulk-spi = "0.11" slf4j-api = "2.0.7" +junit5 = "5.10.0" + [libraries] embulk-spi = { group = "org.embulk", name = "embulk-spi", version.ref = "embulk-spi" } slf4j = { group = "org.slf4j", name = "slf4j-api", version.ref = "slf4j-api" } + +junit5-bom = { group = "org.junit", name = "junit-bom", version.ref = "junit5" } +junit5-api = { group = "org.junit.jupiter", name = "junit-jupiter-api" } +junit5-params = { group = "org.junit.jupiter", name = "junit-jupiter-params" } +junit5-engine = { group = "org.junit.jupiter", name = "junit-jupiter-engine" } + +[bundles] + +junit5-implementation = [ + "junit5-api", + "junit5-params", +] + +junit5-runtime = [ + "junit5-engine", +] diff --git a/src/main/java/org/embulk/util/file/FileLineageFormatter.java b/src/main/java/org/embulk/util/file/FileLineageFormatter.java new file mode 100644 index 0000000..d225f33 --- /dev/null +++ b/src/main/java/org/embulk/util/file/FileLineageFormatter.java @@ -0,0 +1,182 @@ +/* + * Copyright 2021 The Embulk project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.embulk.util.file; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +/** + * Formats file lineage metadata, given as {@code Map}, into a {@link java.lang.String} with a template. + * + *

It is a helper to format file lineage metadata into a string to fill into columns in records. + * File lineage metadata is metadata of source files passed from a File Input plugin to Decoder/Parser plugins. + * + *

An example of file lineage metadata is like below: + * + *

{@code {
+ *   "bucket": "example-s3-bucket",
+ *   "objectKey": "foo/barbaz.csv",
+ * }}
+ * + *

By a template string {@code "s3://{bucket}/{objectKey}"}, the example file lineage metadata is formatted + * into {@code "s3://example-s3-bucket/foo/barbaz.csv"}. + * + *

Patterns and Their Interpretation

+ * + * {@code FileLineageFormatter} uses patterns of the following form: + * + *
 FileLineageFormatterPattern:
+ *         String
+ *         FileLineageFormatterPattern '{'ParameterKey'}' String
+ *
+ * ParameterKey:
+ *         'a' - 'z'
+ *         'A' - 'Z'
+ *         '0' - '9'
+ *         '_'
+ */ +public final class FileLineageFormatter { + private FileLineageFormatter(final ArrayList tokens) { + this.tokens = Collections.unmodifiableList(tokens); + } + + /** + * Creates a {@link FileLineageFormatter} with a template format. + * + * @param format template format + * @return formatter + * @throws IllegalArgumentException if the specified template format is invalid + */ + public static FileLineageFormatter from(final String format) { + return new FileLineageFormatter(parse(format)); + } + + /** + * Formats a specified file lineage metadata with this formatter. + * + *

When {@code arguments} do not contain an argument required in the template format, {@code "(null)"} is filled instead. + * + * @param arguments file lineage metadata + * @return formatted string + */ + public String format(final Map arguments) { + final StringBuilder builder = new StringBuilder(); + for (final Token token : this.tokens) { + builder.append(token.format(arguments)); + } + return builder.toString(); + } + + private static ArrayList parse(final String format) { + final ArrayList tokens = new ArrayList<>(); + + StringBuilder currentToken = new StringBuilder(); + boolean inTemplate = false; + boolean inQuote = false; + + for (int i = 0; i < format.length(); ++i) { + final char c = format.charAt(i); + if (inTemplate) { // in "{key}" + if (c == '}') { + inTemplate = false; + // It intentionally allows an empty parameter "{}". + tokens.add(new TemplateToken(currentToken.toString())); + currentToken = new StringBuilder(); + } else if (isValidCharAsParameter(c)) { + currentToken.append(c); + } else { + throw new IllegalArgumentException("Template parameter in the format contains an invalid char at index: " + i); + } + } else { // out of "{key}" + if (c == '\'') { + if (i + 1 < format.length() && format.charAt(i + 1) == '\'') { + currentToken.append('\''); + ++i; + } else { + inQuote = !inQuote; + } + } else if (c == '{' && !inQuote) { + inTemplate = true; + if (currentToken.length() > 0) { + tokens.add(new RawToken(currentToken.toString())); + currentToken = new StringBuilder(); + } + } else { + currentToken.append(c); + } + } + } + + if (inTemplate) { + throw new IllegalArgumentException("Unmatched brace in the format."); + } + if (inQuote) { + throw new IllegalArgumentException("Unmatched quote in the format."); + } + if (currentToken.length() > 0) { + tokens.add(new RawToken(currentToken.toString())); + } + return tokens; + } + + /** + * Returns {@code true} if {@code c} is valid as a template parameter. + * + *

It intentionally compares directly with 'a', 'z', 'A', 'Z', '0', '9', and '_' so that it is never affected by locale. + */ + private static boolean isValidCharAsParameter(final char c) { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '_'; + } + + private abstract static class Token { + abstract String format(Map arguments); + } + + private static final class RawToken extends Token { + RawToken(final String raw) { + this.raw = raw; + } + + @Override + String format(Map arguments) { + return this.raw; + } + + private final String raw; + } + + private static final class TemplateToken extends Token { + TemplateToken(final String parameter) { + this.parameter = parameter; + } + + @Override + String format(Map arguments) { + final String value = arguments.get(this.parameter); + if (value == null) { + return "(null)"; + } + return value; + } + + private final String parameter; + } + + private final List tokens; +} diff --git a/src/test/java/org/embulk/util/file/TestFileLineageFormatter.java b/src/test/java/org/embulk/util/file/TestFileLineageFormatter.java new file mode 100644 index 0000000..97af18e --- /dev/null +++ b/src/test/java/org/embulk/util/file/TestFileLineageFormatter.java @@ -0,0 +1,81 @@ +/* + * Copyright 2021 The Embulk project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.embulk.util.file; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; + +import java.util.HashMap; +import org.junit.jupiter.api.Test; + +public class TestFileLineageFormatter { + @Test + public void testEmpty() { + assertFormat("", ""); + } + + @Test + public void testQuote() { + assertFormat("{", "'{'"); + assertFormat("{'}", "'{''}'"); + } + + @Test + public void testSingle() { + assertFormat("(null)", "{foo}", "", "bar"); + assertFormat("bar", "{foo}", "foo", "bar"); + assertFormat("bar", "{}", "", "bar"); + assertFormat("(null)", "{}", "foo", "bar"); + } + + @Test + public void testComplex() { + assertFormat("{bar}{bar}test2", "{foo}{foo}{bar}", "foo", "{bar}", "bar", "test2"); + } + + @Test + public void testInvalidFormats() { + assertInvalidFormat("Unmatched brace in the format.", "{"); + assertInvalidFormat("Unmatched quote in the format.", "'"); + assertInvalidFormat("Unmatched quote in the format.", "'{"); + assertInvalidFormat("Unmatched brace in the format.", "foobar{foo"); + assertInvalidFormat("Template parameter in the format contains an invalid char at index: 7", "foobar{'"); + assertInvalidFormat("Template parameter in the format contains an invalid char at index: 1", "{{}}"); + assertInvalidFormat("Template parameter in the format contains an invalid char at index: 1", "{''}"); + } + + private static void assertInvalidFormat(final String exceptionMessage, final String format) { + try { + FileLineageFormatter.from(format); + } catch (final IllegalArgumentException ex) { + assertEquals(exceptionMessage, ex.getMessage()); + return; + } + fail("IllegalArgumentException is not thrown."); + } + + private static void assertFormat(final String expected, final String format, final String... arguments) { + assertEquals(0, arguments.length % 2); + final HashMap argumentsMap = new HashMap<>(); + for (int i = 0; i < arguments.length; i += 2) { + argumentsMap.put(arguments[i], arguments[i + 1]); + } + + final FileLineageFormatter formatter = FileLineageFormatter.from(format); + assertEquals(expected, formatter.format(argumentsMap)); + } +}