Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] Improve boolean parsing performance #11351

Merged
merged 1 commit into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Disable concurrent aggs for Diversified Sampler and Sampler aggs ([#11087](https://github.com/opensearch-project/OpenSearch/issues/11087))
- Made leader/follower check timeout setting dynamic ([#10528](https://github.com/opensearch-project/OpenSearch/pull/10528))
- Change error message when per shard document limit is breached ([#11312](https://github.com/opensearch-project/OpenSearch/pull/11312))
- Improve boolean parsing performance ([#11308](https://github.com/opensearch-project/OpenSearch/pull/11308))

### Deprecated

Expand Down
183 changes: 70 additions & 113 deletions libs/common/src/main/java/org/opensearch/common/Booleans.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,30 +45,72 @@ private Booleans() {
/**
* Parses a char[] representation of a boolean value to <code>boolean</code>.
*
* @return <code>true</code> iff the sequence of chars is "true", <code>false</code> iff the sequence of chars is "false" or the
* provided default value iff either text is <code>null</code> or length == 0.
* @return <code>true</code> iff the sequence of chars is "true", <code>false</code> iff the sequence of
* chars is "false" or the provided default value iff either text is <code>null</code> or length == 0.
* @throws IllegalArgumentException if the string cannot be parsed to boolean.
*/
public static boolean parseBoolean(char[] text, int offset, int length, boolean defaultValue) {
if (text == null || length == 0) {
if (text == null) {
return defaultValue;
} else {
return parseBoolean(new String(text, offset, length));
}

switch (length) {
case 0:
return defaultValue;
case 1:
case 2:
case 3:
default:
break;
case 4:
if (text[offset] == 't' && text[offset + 1] == 'r' && text[offset + 2] == 'u' && text[offset + 3] == 'e') {
return true;
}
break;
case 5:
if (text[offset] == 'f'
&& text[offset + 1] == 'a'
&& text[offset + 2] == 'l'
&& text[offset + 3] == 's'
&& text[offset + 4] == 'e') {
return false;
}
break;
}

throw new IllegalArgumentException(
"Failed to parse value [" + new String(text, offset, length) + "] as only [true] or [false] are allowed."
);
}

/**
* returns true iff the sequence of chars is one of "true","false".
* Returns true iff the sequence of chars is one of "true", "false".
*
* @param text sequence to check
* @param offset offset to start
* @param length length to check
*/
public static boolean isBoolean(char[] text, int offset, int length) {
if (text == null || length == 0) {
if (text == null) {
return false;
}
return isBoolean(new String(text, offset, length));

switch (length) {
case 0:
case 1:
case 2:
case 3:
default:
return false;
case 4:
return text[offset] == 't' && text[offset + 1] == 'r' && text[offset + 2] == 'u' && text[offset + 3] == 'e';
case 5:
return text[offset] == 'f'
&& text[offset + 1] == 'a'
&& text[offset + 2] == 'l'
&& text[offset + 3] == 's'
&& text[offset + 4] == 'e';
}
}

public static boolean isBoolean(String value) {
Expand All @@ -91,63 +133,45 @@ public static boolean parseBoolean(String value) {
throw new IllegalArgumentException("Failed to parse value [" + value + "] as only [true] or [false] are allowed.");
}

private static boolean hasText(CharSequence str) {
if (str == null || str.length() == 0) {
return false;
}
int strLen = str.length();
for (int i = 0; i < strLen; i++) {
if (!Character.isWhitespace(str.charAt(i))) {
return true;
}
}
return false;
}

/**
* Parses a string representation of a boolean value to <code>boolean</code>.
* Note the subtle difference between this and {@link #parseBoolean(char[], int, int, boolean)}; this returns the
* default value even when the value is non-zero length containing all whitespaces (possibly overlooked, but
* preserving this behavior for compatibility reasons). Use {@link #parseBooleanStrict(String, boolean)} instead.
*
* @param value text to parse.
* @param defaultValue The default value to return if the provided value is <code>null</code>.
* @param defaultValue The default value to return if the provided value is <code>null</code> or blank.
* @return see {@link #parseBoolean(String)}
*/
@Deprecated
public static boolean parseBoolean(String value, boolean defaultValue) {
if (hasText(value)) {
return parseBoolean(value);
}
return defaultValue;
}

public static Boolean parseBoolean(String value, Boolean defaultValue) {
if (hasText(value)) {
return parseBoolean(value);
if (value == null || value.isBlank()) {
return defaultValue;
}
return defaultValue;
return parseBoolean(value);
}

/**
* Returns {@code false} if text is in "false", "0", "off", "no"; else, {@code true}.
*
* @deprecated Only kept to provide automatic upgrades for pre 6.0 indices. Use {@link #parseBoolean(String, Boolean)} instead.
*/
@Deprecated
public static Boolean parseBooleanLenient(String value, Boolean defaultValue) {
if (value == null) { // only for the null case we do that here!
public static Boolean parseBoolean(String value, Boolean defaultValue) {
if (value == null || value.isBlank()) {
return defaultValue;
}
return parseBooleanLenient(value, false);
return parseBoolean(value);
}

/**
* Returns {@code false} if text is in "false", "0", "off", "no"; else, {@code true}.
* Parses a string representation of a boolean value to <code>boolean</code>.
* Analogous to {@link #parseBoolean(char[], int, int, boolean)}.
*
* @deprecated Only kept to provide automatic upgrades for pre 6.0 indices. Use {@link #parseBoolean(String, boolean)} instead.
* @return <code>true</code> iff the sequence of chars is "true", <code>false</code> iff the sequence of
* chars is "false", or the provided default value iff either text is <code>null</code> or length == 0.
* @throws IllegalArgumentException if the string cannot be parsed to boolean.
*/
@Deprecated
public static boolean parseBooleanLenient(String value, boolean defaultValue) {
if (value == null) {
public static boolean parseBooleanStrict(String value, boolean defaultValue) {
if (value == null || value.length() == 0) {
return defaultValue;
}
return !(value.equals("false") || value.equals("0") || value.equals("off") || value.equals("no"));
return parseBoolean(value);
}

/**
Expand All @@ -163,71 +187,4 @@ public static boolean isFalse(String value) {
public static boolean isTrue(String value) {
return "true".equals(value);
}

/**
* Returns {@code false} if text is in "false", "0", "off", "no"; else, {@code true}.
*
* @deprecated Only kept to provide automatic upgrades for pre 6.0 indices. Use {@link #parseBoolean(char[], int, int, boolean)} instead
*/
@Deprecated
public static boolean parseBooleanLenient(char[] text, int offset, int length, boolean defaultValue) {
if (text == null || length == 0) {
return defaultValue;
}
if (length == 1) {
return text[offset] != '0';
}
if (length == 2) {
return !(text[offset] == 'n' && text[offset + 1] == 'o');
}
if (length == 3) {
return !(text[offset] == 'o' && text[offset + 1] == 'f' && text[offset + 2] == 'f');
}
if (length == 5) {
return !(text[offset] == 'f'
&& text[offset + 1] == 'a'
&& text[offset + 2] == 'l'
&& text[offset + 3] == 's'
&& text[offset + 4] == 'e');
}
return true;
}

/**
* returns true if the a sequence of chars is one of "true","false","on","off","yes","no","0","1"
*
* @param text sequence to check
* @param offset offset to start
* @param length length to check
*
* @deprecated Only kept to provide automatic upgrades for pre 6.0 indices. Use {@link #isBoolean(char[], int, int)} instead.
*/
@Deprecated
public static boolean isBooleanLenient(char[] text, int offset, int length) {
if (text == null || length == 0) {
return false;
}
if (length == 1) {
return text[offset] == '0' || text[offset] == '1';
}
if (length == 2) {
return (text[offset] == 'n' && text[offset + 1] == 'o') || (text[offset] == 'o' && text[offset + 1] == 'n');
}
if (length == 3) {
return (text[offset] == 'o' && text[offset + 1] == 'f' && text[offset + 2] == 'f')
|| (text[offset] == 'y' && text[offset + 1] == 'e' && text[offset + 2] == 's');
}
if (length == 4) {
return (text[offset] == 't' && text[offset + 1] == 'r' && text[offset + 2] == 'u' && text[offset + 3] == 'e');
}
if (length == 5) {
return (text[offset] == 'f'
&& text[offset + 1] == 'a'
&& text[offset + 2] == 'l'
&& text[offset + 3] == 's'
&& text[offset + 4] == 'e');
}
return false;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,7 @@

import org.opensearch.test.OpenSearchTestCase;

import java.util.Locale;

import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;

public class BooleansTests extends OpenSearchTestCase {
private static final String[] NON_BOOLEANS = new String[] {
Expand Down Expand Up @@ -81,8 +78,23 @@ public void testParseBooleanWithFallback() {
assertFalse(Booleans.parseBoolean(null, Boolean.FALSE));
assertTrue(Booleans.parseBoolean(null, Boolean.TRUE));

assertFalse(Booleans.parseBoolean("", false));
assertTrue(Booleans.parseBoolean("", true));
assertNull(Booleans.parseBoolean("", null));
assertFalse(Booleans.parseBoolean("", Boolean.FALSE));
assertTrue(Booleans.parseBoolean("", Boolean.TRUE));

assertFalse(Booleans.parseBoolean(" \t\n", false));
assertTrue(Booleans.parseBoolean(" \t\n", true));
assertNull(Booleans.parseBoolean(" \t\n", null));
assertFalse(Booleans.parseBoolean(" \t\n", Boolean.FALSE));
assertTrue(Booleans.parseBoolean(" \t\n", Boolean.TRUE));

assertTrue(Booleans.parseBoolean("true", randomFrom(Boolean.TRUE, Boolean.FALSE, null)));
assertFalse(Booleans.parseBoolean("false", randomFrom(Boolean.TRUE, Boolean.FALSE, null)));

assertTrue(Booleans.parseBoolean(new char[0], 0, 0, true));
assertFalse(Booleans.parseBoolean(new char[0], 0, 0, false));
}

public void testParseNonBooleanWithFallback() {
Expand All @@ -109,56 +121,12 @@ public void testParseNonBoolean() {
}
}

public void testIsBooleanLenient() {
String[] booleans = new String[] { "true", "false", "on", "off", "yes", "no", "0", "1" };
String[] notBooleans = new String[] { "11", "00", "sdfsdfsf", "F", "T" };
assertThat(Booleans.isBooleanLenient(null, 0, 1), is(false));

for (String b : booleans) {
String t = "prefix" + b + "suffix";
assertTrue(
"failed to recognize [" + b + "] as boolean",
Booleans.isBooleanLenient(t.toCharArray(), "prefix".length(), b.length())
);
}

for (String nb : notBooleans) {
String t = "prefix" + nb + "suffix";
assertFalse("recognized [" + nb + "] as boolean", Booleans.isBooleanLenient(t.toCharArray(), "prefix".length(), nb.length()));
}
}

public void testParseBooleanLenient() {
assertThat(Booleans.parseBooleanLenient(randomFrom("true", "on", "yes", "1"), randomBoolean()), is(true));
assertThat(Booleans.parseBooleanLenient(randomFrom("false", "off", "no", "0"), randomBoolean()), is(false));
assertThat(Booleans.parseBooleanLenient(randomFrom("true", "on", "yes").toUpperCase(Locale.ROOT), randomBoolean()), is(true));
assertThat(Booleans.parseBooleanLenient(null, false), is(false));
assertThat(Booleans.parseBooleanLenient(null, true), is(true));

assertThat(
Booleans.parseBooleanLenient(randomFrom("true", "on", "yes", "1"), randomFrom(Boolean.TRUE, Boolean.FALSE, null)),
is(true)
);
assertThat(
Booleans.parseBooleanLenient(randomFrom("false", "off", "no", "0"), randomFrom(Boolean.TRUE, Boolean.FALSE, null)),
is(false)
);
assertThat(
Booleans.parseBooleanLenient(
randomFrom("true", "on", "yes").toUpperCase(Locale.ROOT),
randomFrom(Boolean.TRUE, Boolean.FALSE, null)
),
is(true)
);
assertThat(Booleans.parseBooleanLenient(null, Boolean.FALSE), is(false));
assertThat(Booleans.parseBooleanLenient(null, Boolean.TRUE), is(true));
assertThat(Booleans.parseBooleanLenient(null, null), nullValue());

char[] chars = randomFrom("true", "on", "yes", "1").toCharArray();
assertThat(Booleans.parseBooleanLenient(chars, 0, chars.length, randomBoolean()), is(true));
chars = randomFrom("false", "off", "no", "0").toCharArray();
assertThat(Booleans.parseBooleanLenient(chars, 0, chars.length, randomBoolean()), is(false));
chars = randomFrom("true", "on", "yes").toUpperCase(Locale.ROOT).toCharArray();
assertThat(Booleans.parseBooleanLenient(chars, 0, chars.length, randomBoolean()), is(true));
public void testParseBooleanStrict() {
assertTrue(Booleans.parseBooleanStrict("true", false));
assertFalse(Booleans.parseBooleanStrict("false", true));
assertTrue(Booleans.parseBooleanStrict(null, true));
assertFalse(Booleans.parseBooleanStrict("", false));
expectThrows(IllegalArgumentException.class, () -> Booleans.parseBooleanStrict("foobar", false));
expectThrows(IllegalArgumentException.class, () -> Booleans.parseBooleanStrict(" \t\n", false));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ protected Boolean parseSourceValue(Object value) {
return (Boolean) value;
} else {
String textValue = value.toString();
return Booleans.parseBoolean(textValue.toCharArray(), 0, textValue.length(), false);
return Booleans.parseBooleanStrict(textValue, false);
}
}
};
Expand Down
Loading