forked from opensearch-project/opensearch-spark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Unquote text and identifiers in PPL parsing (opensearch-project#393)
* unquote text and identifiers in PPL parsing Signed-off-by: Sean Kao <[email protected]> * clean PPL suite comments Signed-off-by: Sean Kao <[email protected]> * fix PPL suite typo Signed-off-by: Sean Kao <[email protected]> * parameterize test cases Signed-off-by: Sean Kao <[email protected]> * add UT for StringUtils Signed-off-by: Sean Kao <[email protected]> * use JUnit 4 Signed-off-by: Sean Kao <[email protected]> --------- Signed-off-by: Sean Kao <[email protected]>
- Loading branch information
1 parent
6779d5e
commit a4126b8
Showing
10 changed files
with
260 additions
and
110 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
100 changes: 100 additions & 0 deletions
100
ppl-spark-integration/src/main/java/org/opensearch/sql/common/utils/StringUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.sql.common.utils; | ||
|
||
import com.google.common.base.Strings; | ||
|
||
import java.util.IllegalFormatException; | ||
import java.util.Locale; | ||
|
||
public class StringUtils { | ||
/** | ||
* Unquote Identifier which has " or ' as mark. Strings quoted by ' or " with two of these quotes | ||
* appearing next to each other in the quote acts as an escape<br> | ||
* Example: 'Test''s' will result in 'Test's', similar with those single quotes being replaced | ||
* with double quote. Supports escaping quotes (single/double) and escape characters using the `\` | ||
* characters. | ||
* | ||
* @param text string | ||
* @return An unquoted string whose outer pair of (single/double) quotes have been removed | ||
*/ | ||
public static String unquoteText(String text) { | ||
if (text.length() < 2) { | ||
return text; | ||
} | ||
|
||
char enclosingQuote = 0; | ||
char firstChar = text.charAt(0); | ||
char lastChar = text.charAt(text.length() - 1); | ||
|
||
if (firstChar != lastChar) { | ||
return text; | ||
} | ||
|
||
if (firstChar == '`') { | ||
return text.substring(1, text.length() - 1); | ||
} | ||
|
||
if (firstChar == lastChar && (firstChar == '\'' || firstChar == '"')) { | ||
enclosingQuote = firstChar; | ||
} else { | ||
return text; | ||
} | ||
|
||
char currentChar; | ||
char nextChar; | ||
|
||
StringBuilder textSB = new StringBuilder(); | ||
|
||
// Ignores first and last character as they are the quotes that should be removed | ||
for (int chIndex = 1; chIndex < text.length() - 1; chIndex++) { | ||
currentChar = text.charAt(chIndex); | ||
nextChar = text.charAt(chIndex + 1); | ||
|
||
if ((currentChar == '\\' && (nextChar == '"' || nextChar == '\\' || nextChar == '\'')) | ||
|| (currentChar == nextChar && currentChar == enclosingQuote)) { | ||
chIndex++; | ||
currentChar = nextChar; | ||
} | ||
textSB.append(currentChar); | ||
} | ||
return textSB.toString(); | ||
} | ||
|
||
/** | ||
* Unquote Identifier which has ` as mark. | ||
* | ||
* @param identifier identifier that possibly enclosed by backticks | ||
* @return An unquoted string whose outer pair of backticks have been removed | ||
*/ | ||
public static String unquoteIdentifier(String identifier) { | ||
if (isQuoted(identifier, "`")) { | ||
return identifier.substring(1, identifier.length() - 1); | ||
} else { | ||
return identifier; | ||
} | ||
} | ||
|
||
/** | ||
* Returns a formatted string using the specified format string and arguments, as well as the | ||
* {@link Locale#ROOT} locale. | ||
* | ||
* @param format format string | ||
* @param args arguments referenced by the format specifiers in the format string | ||
* @return A formatted string | ||
* @throws IllegalFormatException If a format string contains an illegal syntax, a format | ||
* specifier that is incompatible with the given arguments, insufficient arguments given the | ||
* format string, or other illegal conditions. | ||
* @see String#format(Locale, String, Object...) | ||
*/ | ||
public static String format(final String format, Object... args) { | ||
return String.format(Locale.ROOT, format, args); | ||
} | ||
|
||
private static boolean isQuoted(String text, String mark) { | ||
return !Strings.isNullOrEmpty(text) && text.startsWith(mark) && text.endsWith(mark); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.