From 7086031c11533f8cd36bbfa1d2ba2f4e9f02a03c Mon Sep 17 00:00:00 2001 From: Wellington Chevreuil Date: Mon, 10 May 2021 11:52:24 +0100 Subject: [PATCH 1/6] HBASE-25874 [hbase-operator-tools]Add tool for identifying 'unknown servers' from master logs, then submit SCPs for each of those. --- hbase-tools/README.md | 31 +++++- .../RegionsOnUnknownServersRecoverer.java | 101 ++++++++++++++++++ 2 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java diff --git a/hbase-tools/README.md b/hbase-tools/README.md index 5bbd0d9177..3fcfd0dbe8 100644 --- a/hbase-tools/README.md +++ b/hbase-tools/README.md @@ -29,7 +29,7 @@ module are: - RegionsMerger; - MissingRegionDirsRepairTool; - +- RegionsOnUnknownServersRecoverer; ## Setup Make sure HBase tools jar is added to HBase classpath: @@ -138,4 +138,31 @@ the affected regions, it copies the entire region dir to a region hfiles to a `HBASE_ROOT_DIR/.missing_dirs_repair/TS/TBL_NAME/bulkload` dir, renaming these files with the pattern `REGION_NAME-FILENAME`. For a given table, all affected regions would then have all its files under same directory for bulkload. _MissingRegionDirsRepairTool_ then uses -_LoadIncrementalHFiles_ to load all files for a given table at once. \ No newline at end of file +_LoadIncrementalHFiles_ to load all files for a given table at once. + +## RegionsOnUnknownServersRecoverer - Tool for recovering regions on "unknown servers." + +_RegionsOnUnknownServersRecoverer_ parses the master log to identify `unknown servers` +holding regions. This condition may happen in the event of recovering previously destroyed clusters, +where new Master/RS names completely differ from the previous ones currently +stored in meta table (see HBASE-24286). + +``` +NOTE: This tool is useful for clusters runing hbase versions lower than 2.2.7, 2.3.5 and 2.4.7. +For any of these versions or higher, HBCK2 'recoverUnknown' option can be used as a much simpler solution. +``` + +### Usage + +This tool requires the master logs path as parameter. Assuming classpath is properly set, can be run as follows: + +``` +$ hbase org.apache.hbase.RegionsOnUnknownServersRecoverer PATH_TO_MASTER_LOGS +``` + + +### Implementation Details + +_RegionsOnUnknownServersRecoverer_ parses master log file searching for specific messages mentioning + "unknown servers". Once "unknown servers" are found, it then uses `HBCK2.scheduleRecoveries` to + submit SCPs for each of these "unknown servers". \ No newline at end of file diff --git a/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java b/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java new file mode 100644 index 0000000000..36b5b3c25a --- /dev/null +++ b/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hbase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.util.HashSet; +import java.util.Set; + +/** + * Tool for identifying Unknown Servers from master logs and schedule SCPs for each of those using + * HBCK2 'scheduleRecoveries' option. This is useful for clusters running hbase versions lower than + * 2.2.7, 2.3.5 and 2.4.7. For any of these versions or higher, use HBCK2 'recoverUnknown' option. + */ +public class RegionsOnUnknownServersRecoverer extends Configured implements Tool { + + private static final Logger LOG = + LoggerFactory.getLogger(RegionsOnUnknownServersRecoverer.class.getName()); + + private static final String CATALOG_JANITOR = "CatalogJanitor: hole="; + + private static final String UNKNOWN_SERVER = "unknown_server="; + + private Configuration conf; + + private Set unknownServers = new HashSet<>(); + + public RegionsOnUnknownServersRecoverer(Configuration conf){ + this.conf = conf; + } + + @Override + public int run(String[] args) throws Exception { + if(args.length!=1){ + LOG.error("Wrong number of arguments. " + + "Arguments are: "); + return 1; + } + BufferedReader reader = null; + try(Connection conn = ConnectionFactory.createConnection(conf)) { + reader = new BufferedReader(new FileReader(new File(args[0]))); + String line = null; + while((line = reader.readLine()) != null){ + if(line.contains(CATALOG_JANITOR)){ + String[] servers = line.split(UNKNOWN_SERVER); + for(int i=1; i Date: Tue, 11 May 2021 09:58:47 +0100 Subject: [PATCH 2/6] addressing review comments --- hbase-tools/README.md | 8 ++-- .../RegionsOnUnknownServersRecoverer.java | 39 +++++++++++++------ 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/hbase-tools/README.md b/hbase-tools/README.md index 3fcfd0dbe8..03a32b05e8 100644 --- a/hbase-tools/README.md +++ b/hbase-tools/README.md @@ -142,13 +142,13 @@ _LoadIncrementalHFiles_ to load all files for a given table at once. ## RegionsOnUnknownServersRecoverer - Tool for recovering regions on "unknown servers." -_RegionsOnUnknownServersRecoverer_ parses the master log to identify `unknown servers` +_RegionsOnUnknownServersRecoverer_ parses the master log to identify `unknown servers` holding regions. This condition may happen in the event of recovering previously destroyed clusters, -where new Master/RS names completely differ from the previous ones currently +where new Master/RS names completely differ from the previous ones currently stored in meta table (see HBASE-24286). ``` -NOTE: This tool is useful for clusters runing hbase versions lower than 2.2.7, 2.3.5 and 2.4.7. +NOTE: This tool is useful for clusters runing hbase versions lower than 2.2.7, 2.3.5 and 2.4.7. For any of these versions or higher, HBCK2 'recoverUnknown' option can be used as a much simpler solution. ``` @@ -164,5 +164,5 @@ $ hbase org.apache.hbase.RegionsOnUnknownServersRecoverer PATH_TO_MASTER_LOGS ### Implementation Details _RegionsOnUnknownServersRecoverer_ parses master log file searching for specific messages mentioning - "unknown servers". Once "unknown servers" are found, it then uses `HBCK2.scheduleRecoveries` to + "unknown servers". Once "unknown servers" are found, it then uses `HBCK2.scheduleRecoveries` to submit SCPs for each of these "unknown servers". \ No newline at end of file diff --git a/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java b/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java index 36b5b3c25a..8a117a9375 100644 --- a/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java +++ b/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java @@ -17,6 +17,12 @@ */ package org.apache.hbase; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.util.HashSet; +import java.util.Set; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.hbase.HBaseConfiguration; @@ -27,11 +33,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.util.HashSet; -import java.util.Set; /** * Tool for identifying Unknown Servers from master logs and schedule SCPs for each of those using @@ -51,20 +52,28 @@ public class RegionsOnUnknownServersRecoverer extends Configured implements Tool private Set unknownServers = new HashSet<>(); + private boolean dryRun = false; + public RegionsOnUnknownServersRecoverer(Configuration conf){ this.conf = conf; } @Override public int run(String[] args) throws Exception { - if(args.length!=1){ + String logPath = null; + if(args.length>=1 && args.length<3) { + logPath = args[0]; + if(args.length==2) { + dryRun = Boolean.parseBoolean(args[1]); + } + } else { LOG.error("Wrong number of arguments. " - + "Arguments are: "); + + "Arguments are: [dryRun]"); return 1; } BufferedReader reader = null; try(Connection conn = ConnectionFactory.createConnection(conf)) { - reader = new BufferedReader(new FileReader(new File(args[0]))); + reader = new BufferedReader(new FileReader(new File(logPath))); String line = null; while((line = reader.readLine()) != null){ if(line.contains(CATALOG_JANITOR)){ @@ -78,8 +87,17 @@ public int run(String[] args) throws Exception { } } } - HBCK2 hbck2 = new HBCK2(conf); - hbck2.scheduleRecoveries(conn.getHbck(), unknownServers.toArray(new String[]{})); + if(dryRun){ + StringBuilder builder = + new StringBuilder("This is a dry run, no SCPs will be submitted. Found unknown servers:"); + builder.append("\n"); + unknownServers.stream().forEach( s -> builder.append(s).append("\n")); + LOG.info(builder.toString()); + } else { + HBCK2 hbck2 = new HBCK2(conf); + LOG.info("Submitting SCPs for the found unknown servers with HBCK2 scheduleRecoveries option."); + hbck2.scheduleRecoveries(conn.getHbck(), unknownServers.toArray(new String[] {})); + } } catch(Exception e){ LOG.error("Recovering unknown servers failed:", e); return 2; @@ -96,6 +114,5 @@ public static void main(String [] args) throws Exception { System.exit(errCode); } } - } From 58271d74b32635f0ca443a1c8797810963bcf274 Mon Sep 17 00:00:00 2001 From: Wellington Chevreuil Date: Tue, 11 May 2021 14:36:37 +0100 Subject: [PATCH 3/6] checkstyle --- .../org/apache/hbase/RegionsOnUnknownServersRecoverer.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java b/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java index 8a117a9375..7911c4eb61 100644 --- a/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java +++ b/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java @@ -91,11 +91,12 @@ public int run(String[] args) throws Exception { StringBuilder builder = new StringBuilder("This is a dry run, no SCPs will be submitted. Found unknown servers:"); builder.append("\n"); - unknownServers.stream().forEach( s -> builder.append(s).append("\n")); + unknownServers.stream().forEach(s -> builder.append(s).append("\n")); LOG.info(builder.toString()); } else { HBCK2 hbck2 = new HBCK2(conf); - LOG.info("Submitting SCPs for the found unknown servers with HBCK2 scheduleRecoveries option."); + LOG.info("Submitting SCPs for the found unknown servers with " + + "HBCK2 scheduleRecoveries option."); hbck2.scheduleRecoveries(conn.getHbck(), unknownServers.toArray(new String[] {})); } } catch(Exception e){ From 09f0a92b4330524fd0c52f949b170983f6f23148 Mon Sep 17 00:00:00 2001 From: Wellington Chevreuil Date: Wed, 12 May 2021 13:06:18 +0100 Subject: [PATCH 4/6] more reviews --- hbase-tools/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hbase-tools/README.md b/hbase-tools/README.md index 03a32b05e8..d6b1d72390 100644 --- a/hbase-tools/README.md +++ b/hbase-tools/README.md @@ -154,12 +154,16 @@ For any of these versions or higher, HBCK2 'recoverUnknown' option can be used a ### Usage -This tool requires the master logs path as parameter. Assuming classpath is properly set, can be run as follows: +This tool requires the master logs path as parameter. Assuming classpath is properly set, +can be run as follows: ``` -$ hbase org.apache.hbase.RegionsOnUnknownServersRecoverer PATH_TO_MASTER_LOGS +$ hbase org.apache.hbase.RegionsOnUnknownServersRecoverer PATH_TO_MASTER_LOGS [dryRun] ``` +The `dryRun` optional parameter will just parse the logs and print the list of unknown servers, +without invoking `hbck2 scheduleRecoveries` command. + ### Implementation Details From 1f1c4db78110ae1da12a35554452d5836931aa04 Mon Sep 17 00:00:00 2001 From: Wellington Chevreuil Date: Wed, 12 May 2021 14:42:01 +0100 Subject: [PATCH 5/6] more reviews --- hbase-tools/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-tools/README.md b/hbase-tools/README.md index d6b1d72390..74a55ac2ab 100644 --- a/hbase-tools/README.md +++ b/hbase-tools/README.md @@ -154,7 +154,7 @@ For any of these versions or higher, HBCK2 'recoverUnknown' option can be used a ### Usage -This tool requires the master logs path as parameter. Assuming classpath is properly set, +This tool requires the master logs path as parameter. Assuming classpath is properly set, can be run as follows: ``` From a18968d540f968db06aa8cb5745c15fc6bf26bbb Mon Sep 17 00:00:00 2001 From: Wellington Chevreuil Date: Wed, 12 May 2021 15:35:24 +0100 Subject: [PATCH 6/6] fixing dryRun --- .../java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java b/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java index 7911c4eb61..1a83ca29d8 100644 --- a/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java +++ b/hbase-tools/src/main/java/org/apache/hbase/RegionsOnUnknownServersRecoverer.java @@ -64,7 +64,7 @@ public int run(String[] args) throws Exception { if(args.length>=1 && args.length<3) { logPath = args[0]; if(args.length==2) { - dryRun = Boolean.parseBoolean(args[1]); + dryRun = args[1].equals("dryRun"); } } else { LOG.error("Wrong number of arguments. "