From a4af0cccbd0a6cccf8637d2c8539e8639fbe1ab8 Mon Sep 17 00:00:00 2001 From: Nihal Jain Date: Wed, 6 Sep 2023 22:51:28 +0530 Subject: [PATCH] HBASE-27961 Running assigns/unassigns command with large number of files/regions throws CallTimeoutException (#135) Signed-off-by: Peter Somogyi --- hbase-hbck2/README.md | 23 ++- .../src/main/java/org/apache/hbase/HBCK2.java | 139 +++++++++++++++--- .../test/java/org/apache/hbase/TestHBCK2.java | 32 ++++ .../hbase/TestHBCKCommandLineParsing.java | 10 ++ 4 files changed, 182 insertions(+), 22 deletions(-) diff --git a/hbase-hbck2/README.md b/hbase-hbck2/README.md index b5c14edee7..35e39693e2 100644 --- a/hbase-hbck2/README.md +++ b/hbase-hbck2/README.md @@ -146,6 +146,7 @@ Command: Options: -o,--override override ownership by another procedure -i,--inputFiles take one or more files of encoded region names + -b,--batchSize number of regions to process in a batch A 'raw' assign that can be used even during Master initialization (if the -skip flag is specified). Skirts Coprocessors. Pass one or more encoded region names. 1588230740 is the hard-coded name for the @@ -156,6 +157,12 @@ Command: If -i or --inputFiles is specified, pass one or more input file names. Each file contains encoded region names, one per line. For example: $ HBCK2 assigns -i fileName1 fileName2 + If -b or --batchSize is specified, the command processes those many + regions at a time in a batch-ed manner; Consider using this option, + if the list of regions is huge, to avoid CallTimeoutException. + For example: + $ HBCK2 assigns -i fileName1 fileName2 -b 500 + By default, batchSize is set to -1 i.e. no batching is done. bypass [OPTIONS] [...|-i ...] Options: @@ -163,6 +170,7 @@ Command: -r,--recursive bypass parent and its children. SLOW! EXPENSIVE! -w,--lockWait milliseconds to wait before giving up; default=1 -i,--inputFiles take one or more input files of PID's + -b,--batchSize number of procedures to process in a batch Pass one (or more) procedure 'pid's to skip to procedure finish. Parent of bypassed procedure will also be skipped to the finish. Entities will be left in an inconsistent state and will require manual fixup. May @@ -173,6 +181,12 @@ Command: If -i or --inputFiles is specified, pass one or more input file names. Each file contains PID's, one per line. For example: $ HBCK2 bypass -i fileName1 fileName2 + If -b or --batchSize is specified, the command processes those many + procedures at a time in a batch-ed manner; Consider using this option, + if the list of procedures is huge, to avoid CallTimeoutException. + For example: + $ HBCK2 bypass -i fileName1 fileName2 -b 500 + By default, batchSize is set to -1 i.e. no batching is done. extraRegionsInMeta [...| -i ...] @@ -385,10 +399,11 @@ Command: Each file contains , one per line. For example: $ HBCK2 scheduleRecoveries -i fileName1 fileName2 - unassigns [...|-i ...] + unassigns [OPTIONS] [...|-i ...] Options: -o,--override override ownership by another procedure -i,--inputFiles take one or more input files of encoded region names + -b,--batchSize number of regions to process in a batch A 'raw' unassign that can be used even during Master initialization (if the -skip flag is specified). Skirts Coprocessors. Pass one or more encoded region names. 1588230740 is the hard-coded name for the @@ -402,6 +417,12 @@ Command: If -i or --inputFiles is specified, pass one or more input file names. Each file contains encoded region names, one per line. For example: $ HBCK2 unassigns -i fileName1 fileName2 + If -b or --batchSize is specified, the tool processes those many + regions at a time in a batch-ed manner; Consider using this option, + if the list of regions is huge, to avoid CallTimeoutException. + For example: + $ HBCK2 unassigns -i fileName1 fileName2 -b 500 + By default, batchSize is set to -1 i.e. no batching is done. ``` Note that when you pass `bin/hbase` the `hbck` argument, it will by diff --git a/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java index 15c89d1b22..b9c7db3a3f 100644 --- a/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java +++ b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java @@ -136,6 +136,17 @@ public class HBCK2 extends Configured implements org.apache.hadoop.util.Tool { */ private static final long DEFAULT_LOCK_WAIT = 1; + /** + * Value which represents no batching. + */ + private static final int NO_BATCH_SIZE = -1; + + /** + * Number of batches to process in a single call. By default, it is set to -1, that is no batching + * would be done. + */ + private static final int DEFAULT_BATCH_SIZE = NO_BATCH_SIZE; + /** * Check for HBCK support. Expects created connection. * @param supportedVersions list of zero or more supported versions. @@ -450,42 +461,80 @@ List>> addMissingRegionsInMetaForTables(String... nameSpaceO } List assigns(Hbck hbck, String[] args) throws IOException { + // Init Options options = new Options(); Option override = Option.builder("o").longOpt("override").build(); Option inputFile = Option.builder("i").longOpt("inputFiles").build(); + Option batchOpt = Option.builder("b").longOpt("batchSize").hasArg().type(Integer.class).build(); options.addOption(override); options.addOption(inputFile); - // Parse command-line. + options.addOption(batchOpt); + + // Parse command-line CommandLine commandLine = getCommandLine(args, options); if (commandLine == null) { return null; } + + int batchSize = getBatchSize(batchOpt, commandLine); boolean overrideFlag = commandLine.hasOption(override.getOpt()); boolean inputFileFlag = commandLine.hasOption(inputFile.getOpt()); - List argList = commandLine.getArgList(); - return hbck.assigns(getFromArgsOrFiles(argList, inputFileFlag), overrideFlag); + + List regionList = getFromArgsOrFiles(commandLine.getArgList(), inputFileFlag); + + // Process here + if (batchSize == NO_BATCH_SIZE) { + return hbck.assigns(regionList, overrideFlag); + } else { + List pidList = new ArrayList<>(regionList.size()); + final List> batch = Lists.partition(regionList, batchSize); + for (int i = 0; i < batch.size(); i++) { + LOG.info("Processing batch #{}", i + 1); + pidList.addAll(hbck.assigns(batch.get(i), overrideFlag)); + } + return pidList; + } } List unassigns(Hbck hbck, String[] args) throws IOException { + // Init Options options = new Options(); Option override = Option.builder("o").longOpt("override").build(); Option inputFile = Option.builder("i").longOpt("inputFiles").build(); + Option batchOpt = Option.builder("b").longOpt("batchSize").hasArg().type(Integer.class).build(); options.addOption(override); options.addOption(inputFile); - // Parse command-line. + options.addOption(batchOpt); + + // Parse command-line CommandLine commandLine = getCommandLine(args, options); if (commandLine == null) { return null; } + boolean overrideFlag = commandLine.hasOption(override.getOpt()); boolean inputFileFlag = commandLine.hasOption(inputFile.getOpt()); - List argList = commandLine.getArgList(); - return hbck.unassigns(getFromArgsOrFiles(argList, inputFileFlag), overrideFlag); + int batchSize = getBatchSize(batchOpt, commandLine); + + List regionList = getFromArgsOrFiles(commandLine.getArgList(), inputFileFlag); + + // Process here + if (batchSize == NO_BATCH_SIZE) { + return hbck.unassigns(regionList, overrideFlag); + } else { + List pidList = new ArrayList<>(regionList.size()); + final List> batch = Lists.partition(regionList, batchSize); + for (int i = 0; i < batch.size(); i++) { + LOG.info("Processing batch #{}", i + 1); + pidList.addAll(hbck.unassigns(batch.get(i), overrideFlag)); + } + return pidList; + } } /** Returns List of results OR null if failed to run. */ List bypass(String[] args) throws IOException { - // Bypass has two options.... + // Init Options options = new Options(); // See usage for 'help' on these options. Option override = Option.builder("o").longOpt("override").build(); @@ -496,7 +545,10 @@ List bypass(String[] args) throws IOException { options.addOption(wait); Option inputFile = Option.builder("i").longOpt("inputFiles").build(); options.addOption(inputFile); - // Parse command-line. + Option batchOpt = Option.builder("b").longOpt("batchSize").hasArg().type(Integer.class).build(); + options.addOption(batchOpt); + + // Parse command-line CommandLine commandLine = getCommandLine(args, options); if (commandLine == null) { return null; @@ -508,6 +560,7 @@ List bypass(String[] args) throws IOException { boolean overrideFlag = commandLine.hasOption(override.getOpt()); boolean recursiveFlag = commandLine.hasOption(recursive.getOpt()); boolean inputFileFlag = commandLine.hasOption(inputFile.getOpt()); + int batchSize = getBatchSize(batchOpt, commandLine); String[] pidStrs = getFromArgsOrFiles(commandLine.getArgList(), inputFileFlag).toArray(new String[0]); @@ -517,9 +570,21 @@ List bypass(String[] args) throws IOException { } List pids = Arrays.stream(pidStrs).map(Long::valueOf).collect(Collectors.toList()); + // Process here try (ClusterConnection connection = connect(); Hbck hbck = connection.getHbck()) { checkFunctionSupported(connection, BYPASS); - return hbck.bypassProcedure(pids, lockWait, overrideFlag, recursiveFlag); + if (batchSize == NO_BATCH_SIZE) { + return hbck.bypassProcedure(pids, lockWait, overrideFlag, recursiveFlag); + } else { + List statusList = new ArrayList<>(pids.size()); + final List> batch = Lists.partition(pids, batchSize); + for (int i = 0; i < batch.size(); i++) { + LOG.info("Processing batch #{}", i + 1); + statusList + .addAll(hbck.bypassProcedure(batch.get(i), lockWait, overrideFlag, recursiveFlag)); + } + return statusList; + } } } @@ -663,16 +728,23 @@ private static void usageAssigns(PrintWriter writer) { writer.println(" Options:"); writer.println(" -o,--override override ownership by another procedure"); writer.println(" -i,--inputFiles take one or more files of encoded region names"); + writer.println(" -b,--batchSize number of regions to process in a batch"); writer.println(" A 'raw' assign that can be used even during Master initialization (if"); writer.println(" the -skip flag is specified). Skirts Coprocessors. Pass one or more"); writer.println(" encoded region names. 1588230740 is the hard-coded name for the"); writer.println(" hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an example of"); writer.println(" what a user-space encoded region name looks like. For example:"); - writer.println(" $ HBCK2 assigns 1588230740 de00010733901a05f5a2a3a382e27dd4"); + writer.println(" $ HBCK2 " + ASSIGNS + " 1588230740 de00010733901a05f5a2a3a382e27dd4"); writer.println(" Returns the pid(s) of the created AssignProcedure(s) or -1 if none."); writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); writer.println(" Each file contains encoded region names, one per line. For example:"); - writer.println(" $ HBCK2 assigns -i fileName1 fileName2"); + writer.println(" $ HBCK2 " + ASSIGNS + " -i fileName1 fileName2"); + writer.println(" If -b or --batchSize is specified, the command processes those many"); + writer.println(" regions at a time in a batch-ed manner; Consider using this option,"); + writer.println(" if the list of regions is huge, to avoid CallTimeoutException."); + writer.println(" For example:"); + writer.println(" $ HBCK2 " + ASSIGNS + " -i fileName1 fileName2 -b 500"); + writer.println(" By default, batchSize is set to -1 i.e. no batching is done."); } private static void usageBypass(PrintWriter writer) { @@ -682,6 +754,7 @@ private static void usageBypass(PrintWriter writer) { writer.println(" -r,--recursive bypass parent and its children. SLOW! EXPENSIVE!"); writer.println(" -w,--lockWait milliseconds to wait before giving up; default=1"); writer.println(" -i,--inputFiles take one or more input files of PID's"); + writer.println(" -b,--batchSize number of procedures to process in a batch"); writer.println(" Pass one (or more) procedure 'pid's to skip to procedure finish. Parent"); writer.println(" of bypassed procedure will also be skipped to the finish. Entities will"); writer.println(" be left in an inconsistent state and will require manual fixup. May"); @@ -692,6 +765,12 @@ private static void usageBypass(PrintWriter writer) { writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); writer.println(" Each file contains PID's, one per line. For example:"); writer.println(" $ HBCK2 " + BYPASS + " -i fileName1 fileName2"); + writer.println(" If -b or --batchSize is specified, the command processes those many"); + writer.println(" procedures at a time in a batch-ed manner; Consider using this option,"); + writer.println(" if the list of procedures is huge, to avoid CallTimeoutException."); + writer.println(" For example:"); + writer.println(" $ HBCK2 " + BYPASS + " -i fileName1 fileName2 -b 500"); + writer.println(" By default, batchSize is set to -1 i.e. no batching is done."); } private static void usageFilesystem(PrintWriter writer) { @@ -909,16 +988,17 @@ private static void usageRecoverUnknown(PrintWriter writer) { } private static void usageUnassigns(PrintWriter writer) { - writer.println(" " + UNASSIGNS + " [...|-i ...]"); + writer.println(" " + UNASSIGNS + " [OPTIONS] [...|-i ...]"); writer.println(" Options:"); writer.println(" -o,--override override ownership by another procedure"); writer.println(" -i,--inputFiles take one or more input files of encoded region names"); + writer.println(" -b,--batchSize number of regions to process in a batch"); writer.println(" A 'raw' unassign that can be used even during Master initialization"); writer.println(" (if the -skip flag is specified). Skirts Coprocessors. Pass one or"); writer.println(" more encoded region names. 1588230740 is the hard-coded name for the"); writer.println(" hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an example"); writer.println(" of what a userspace encoded region name looks like. For example:"); - writer.println(" $ HBCK2 unassigns 1588230740 de00010733901a05f5a2a3a382e27dd4"); + writer.println(" $ HBCK2 " + UNASSIGNS + " 1588230740 de00010733901a05f5a2a3a382e27dd4"); writer.println(" Returns the pid(s) of the created UnassignProcedure(s) or -1 if none."); writer.println(); writer.println(" SEE ALSO, org.apache.hbase.hbck1.OfflineMetaRepair, the offline"); @@ -926,6 +1006,12 @@ private static void usageUnassigns(PrintWriter writer) { writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); writer.println(" Each file contains encoded region names, one per line. For example:"); writer.println(" $ HBCK2 " + UNASSIGNS + " -i fileName1 fileName2"); + writer.println(" If -b or --batchSize is specified, the tool processes those many"); + writer.println(" regions at a time in a batch-ed manner; Consider using this option,"); + writer.println(" if the list of regions is huge, to avoid CallTimeoutException."); + writer.println(" For example:"); + writer.println(" $ HBCK2 " + UNASSIGNS + " -i fileName1 fileName2 -b 500"); + writer.println(" By default, batchSize is set to -1 i.e. no batching is done."); } private static void usageRegioninfoMismatch(PrintWriter writer) { @@ -1364,7 +1450,7 @@ private String[] formatSetRegionStateCommand(String[] commands) { } if (replicaId > 0) { System.out - .println("Change state for replica reigon " + replicaId + " for primary region " + region); + .println("Change state for replica region " + replicaId + " for primary region " + region); } return new String[] { region, replicaId.toString(), state.name() }; @@ -1411,13 +1497,6 @@ private CommandLine parseCommandWithInputList(String[] args, Options options) { return getCommandLine(args, options); } - private Pair> parseAndGetCommandLineWithInputOption(String[] args, - Options options) throws IOException { - CommandLine commandLine = parseCommandWithInputList(args, options); - List params = getFromArgsOrFiles(commandLine.getArgList(), commandLine.hasOption("i")); - return Pair.newPair(commandLine, params); - } - private Pair> parseCommandWithFixAndInputOptions(String[] args) throws IOException { Options options = new Options(); @@ -1466,4 +1545,22 @@ private List getFromFiles(List args) throws IOException { } return argList; } + + static int getBatchSize(Option batchOpt, CommandLine commandLine) + throws IllegalArgumentException { + int batchSize = DEFAULT_BATCH_SIZE; + try { + if (commandLine.hasOption(batchOpt.getOpt())) { + batchSize = Integer.parseInt(commandLine.getOptionValue(batchOpt.getOpt())); + if (batchSize <= 0) { + throw new IllegalArgumentException("Batch size should be greater than 0!"); + } + } + } catch (NumberFormatException ex) { + throw new IllegalArgumentException("Batch size should be an integer!"); + } + LOG.info("Batch size set to: " + batchSize); + return batchSize; + } + } diff --git a/hbase-hbck2/src/test/java/org/apache/hbase/TestHBCK2.java b/hbase-hbck2/src/test/java/org/apache/hbase/TestHBCK2.java index 3cedc64036..cce6749239 100644 --- a/hbase-hbck2/src/test/java/org/apache/hbase/TestHBCK2.java +++ b/hbase-hbck2/src/test/java/org/apache/hbase/TestHBCK2.java @@ -63,6 +63,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; +import org.apache.hbase.thirdparty.org.apache.commons.cli.DefaultParser; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; +import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; + /** * Tests commands. For command-line parsing, see adjacent test. * @see TestHBCKCommandLineParsing @@ -770,4 +776,30 @@ public void testByPassWithInputFiles() throws IOException { assertFalse(rs); } } + + @Test(expected = IllegalArgumentException.class) + public void testParsingOfBatchSizeWithInvalidDatatype() throws Exception { + String value = "2A"; + parseAndGetBatchSize(value); + } + + @Test(expected = IllegalArgumentException.class) + public void testParsingOfBatchSizeWithInvalidValue() throws Exception { + parseAndGetBatchSize("-2"); + } + + @Test + public void testParsingOfBatchSize() throws Exception { + assertEquals(200, parseAndGetBatchSize("200")); + } + + private int parseAndGetBatchSize(String value) throws ParseException { + Options options = new Options(); + Option batchOpt = Option.builder("b").longOpt("batchSize").hasArg().type(Integer.class).build(); + options.addOption(batchOpt); + + CommandLine commandLine = + new DefaultParser().parse(options, new String[] { "-b", value }, false); + return HBCK2.getBatchSize(batchOpt, commandLine); + } } diff --git a/hbase-hbck2/src/test/java/org/apache/hbase/TestHBCKCommandLineParsing.java b/hbase-hbck2/src/test/java/org/apache/hbase/TestHBCKCommandLineParsing.java index adf720c54f..71bc97b9f6 100644 --- a/hbase-hbck2/src/test/java/org/apache/hbase/TestHBCKCommandLineParsing.java +++ b/hbase-hbck2/src/test/java/org/apache/hbase/TestHBCKCommandLineParsing.java @@ -365,4 +365,14 @@ private String retrieveOptionOutput(String[] options) throws IOException { System.setOut(oldOut); return os.toString(); } + + @Test(expected = IllegalArgumentException.class) + public void testAssignsParsingOfBatchSizeWithInvalidDatatype() throws Exception { + retrieveOptionOutput(new String[] { "assigns", "-b", "2A", "r1", "r2", "r3" }); + } + + @Test(expected = IllegalArgumentException.class) + public void testUnassignsParsingOfBatchSizeWithInvalidValue() throws Exception { + retrieveOptionOutput(new String[] { "unassigns", "-b", "-2", "r1", "r2", "r3" }); + } }