Skip to content

Commit

Permalink
Add FileLineageFormatter
Browse files Browse the repository at this point in the history
  • Loading branch information
dmikurube committed Apr 10, 2024
1 parent a59af36 commit 9c5f749
Show file tree
Hide file tree
Showing 4 changed files with 286 additions and 0 deletions.
5 changes: 5 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ java {
dependencies {
compileOnly libs.embulk.spi
compileOnly libs.slf4j

testImplementation platform(libs.junit5.bom)
testImplementation libs.bundles.junit5.implementation
testRuntimeOnly libs.bundles.junit5.runtime
}

javadoc {
Expand Down Expand Up @@ -175,6 +179,7 @@ signing {
}

test {
useJUnitPlatform()
testLogging {
events "passed", "skipped", "failed", "standardOut", "standardError"
exceptionFormat = org.gradle.api.tasks.testing.logging.TestExceptionFormat.FULL
Expand Down
18 changes: 18 additions & 0 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,24 @@
embulk-spi = "0.11"
slf4j-api = "2.0.7"

junit5 = "5.10.0"

[libraries]
embulk-spi = { group = "org.embulk", name = "embulk-spi", version.ref = "embulk-spi" }
slf4j = { group = "org.slf4j", name = "slf4j-api", version.ref = "slf4j-api" }

junit5-bom = { group = "org.junit", name = "junit-bom", version.ref = "junit5" }
junit5-api = { group = "org.junit.jupiter", name = "junit-jupiter-api" }
junit5-params = { group = "org.junit.jupiter", name = "junit-jupiter-params" }
junit5-engine = { group = "org.junit.jupiter", name = "junit-jupiter-engine" }

[bundles]

junit5-implementation = [
"junit5-api",
"junit5-params",
]

junit5-runtime = [
"junit5-engine",
]
182 changes: 182 additions & 0 deletions src/main/java/org/embulk/util/file/FileLineageFormatter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
/*
* Copyright 2021 The Embulk project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.embulk.util.file;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

/**
* Formats file lineage metadata, given as {@code Map<String, String>}, into a {@link java.lang.String} with a template.
*
* <p>It is a helper to format file lineage metadata into a string to fill into columns in records.
* File lineage metadata is metadata of source files passed from a File Input plugin to Decoder/Parser plugins.
*
* <p>An example of file lineage metadata is like below:
*
* <pre>{@code {
* "bucket": "example-s3-bucket",
* "objectKey": "foo/barbaz.csv",
* }}</pre>
*
* <p>By a template string {@code "s3://{bucket}/{objectKey}"}, the example file lineage metadata is formatted
* into {@code "s3://example-s3-bucket/foo/barbaz.csv"}.
*
* <h3><a name="patterns">Patterns and Their Interpretation</a></h3>
*
* {@code FileLineageFormatter} uses patterns of the following form:
*
* <blockquote><pre> <i>FileLineageFormatterPattern:</i>
* <i>String</i>
* <i>FileLineageFormatterPattern</i> '{'<i>ParameterKey</i>'}' <i>String</i>
*
* <i>ParameterKey:</i>
* 'a' - 'z'
* 'A' - 'Z'
* '0' - '9'
* '_'</pre></blockquote>
*/
public final class FileLineageFormatter {
private FileLineageFormatter(final ArrayList<Token> tokens) {
this.tokens = Collections.unmodifiableList(tokens);
}

/**
* Creates a {@link FileLineageFormatter} with a template format.
*
* @param format template format
* @return formatter
* @throws IllegalArgumentException if the specified template format is invalid
*/
public static FileLineageFormatter from(final String format) {
return new FileLineageFormatter(parse(format));
}

/**
* Formats a specified file lineage metadata with this formatter.
*
* <p>When {@code arguments} do not contain an argument required in the template format, {@code "(null)"} is filled instead.
*
* @param arguments file lineage metadata
* @return formatted string
*/
public String format(final Map<String, String> arguments) {
final StringBuilder builder = new StringBuilder();
for (final Token token : this.tokens) {
builder.append(token.format(arguments));
}
return builder.toString();
}

private static ArrayList<Token> parse(final String format) {
final ArrayList<Token> tokens = new ArrayList<>();

StringBuilder currentToken = new StringBuilder();
boolean inTemplate = false;
boolean inQuote = false;

for (int i = 0; i < format.length(); ++i) {
final char c = format.charAt(i);
if (inTemplate) { // in "{key}"
if (c == '}') {
inTemplate = false;
// It intentionally allows an empty parameter "{}".
tokens.add(new TemplateToken(currentToken.toString()));
currentToken = new StringBuilder();
} else if (isValidCharAsParameter(c)) {
currentToken.append(c);
} else {
throw new IllegalArgumentException("Template parameter in the format contains an invalid char at index: " + i);
}
} else { // out of "{key}"
if (c == '\'') {
if (i + 1 < format.length() && format.charAt(i + 1) == '\'') {
currentToken.append('\'');
++i;
} else {
inQuote = !inQuote;
}
} else if (c == '{' && !inQuote) {
inTemplate = true;
if (currentToken.length() > 0) {
tokens.add(new RawToken(currentToken.toString()));
currentToken = new StringBuilder();
}
} else {
currentToken.append(c);
}
}
}

if (inTemplate) {
throw new IllegalArgumentException("Unmatched brace in the format.");
}
if (inQuote) {
throw new IllegalArgumentException("Unmatched quote in the format.");
}
if (currentToken.length() > 0) {
tokens.add(new RawToken(currentToken.toString()));
}
return tokens;
}

/**
* Returns {@code true} if {@code c} is valid as a template parameter.
*
* <p>It intentionally compares directly with 'a', 'z', 'A', 'Z', '0', '9', and '_' so that it is never affected by locale.
*/
private static boolean isValidCharAsParameter(final char c) {
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '_';
}

private abstract static class Token {
abstract String format(Map<String, String> arguments);
}

private static final class RawToken extends Token {
RawToken(final String raw) {
this.raw = raw;
}

@Override
String format(Map<String, String> arguments) {
return this.raw;
}

private final String raw;
}

private static final class TemplateToken extends Token {
TemplateToken(final String parameter) {
this.parameter = parameter;
}

@Override
String format(Map<String, String> arguments) {
final String value = arguments.get(this.parameter);
if (value == null) {
return "(null)";
}
return value;
}

private final String parameter;
}

private final List<Token> tokens;
}
81 changes: 81 additions & 0 deletions src/test/java/org/embulk/util/file/TestFileLineageFormatter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright 2021 The Embulk project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.embulk.util.file;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;

import java.util.HashMap;
import org.junit.jupiter.api.Test;

public class TestFileLineageFormatter {
@Test
public void testEmpty() {
assertFormat("", "");
}

@Test
public void testQuote() {
assertFormat("{", "'{'");
assertFormat("{'}", "'{''}'");
}

@Test
public void testSingle() {
assertFormat("(null)", "{foo}", "", "bar");
assertFormat("bar", "{foo}", "foo", "bar");
assertFormat("bar", "{}", "", "bar");
assertFormat("(null)", "{}", "foo", "bar");
}

@Test
public void testComplex() {
assertFormat("{bar}{bar}test2", "{foo}{foo}{bar}", "foo", "{bar}", "bar", "test2");
}

@Test
public void testInvalidFormats() {
assertInvalidFormat("Unmatched brace in the format.", "{");
assertInvalidFormat("Unmatched quote in the format.", "'");
assertInvalidFormat("Unmatched quote in the format.", "'{");
assertInvalidFormat("Unmatched brace in the format.", "foobar{foo");
assertInvalidFormat("Template parameter in the format contains an invalid char at index: 7", "foobar{'");
assertInvalidFormat("Template parameter in the format contains an invalid char at index: 1", "{{}}");
assertInvalidFormat("Template parameter in the format contains an invalid char at index: 1", "{''}");
}

private static void assertInvalidFormat(final String exceptionMessage, final String format) {
try {
FileLineageFormatter.from(format);
} catch (final IllegalArgumentException ex) {
assertEquals(exceptionMessage, ex.getMessage());
return;
}
fail("IllegalArgumentException is not thrown.");
}

private static void assertFormat(final String expected, final String format, final String... arguments) {
assertEquals(0, arguments.length % 2);
final HashMap<String, String> argumentsMap = new HashMap<>();
for (int i = 0; i < arguments.length; i += 2) {
argumentsMap.put(arguments[i], arguments[i + 1]);
}

final FileLineageFormatter formatter = FileLineageFormatter.from(format);
assertEquals(expected, formatter.format(argumentsMap));
}
}

0 comments on commit 9c5f749

Please sign in to comment.