Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 3F8E6200BEE for ; Sat, 17 Dec 2016 03:05:14 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 39269160B32; Sat, 17 Dec 2016 02:05:14 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 33B80160B24 for ; Sat, 17 Dec 2016 03:05:13 +0100 (CET) Received: (qmail 5785 invoked by uid 500); 17 Dec 2016 02:05:12 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 5776 invoked by uid 99); 17 Dec 2016 02:05:12 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 17 Dec 2016 02:05:12 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 55CA6DF9F4; Sat, 17 Dec 2016 02:05:11 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: tedyu@apache.org To: commits@hbase.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hbase git commit: HBASE-15432 TableInputFormat - support multi column family scan (Xuesen Liang) Date: Sat, 17 Dec 2016 02:05:10 +0000 (UTC) archived-at: Sat, 17 Dec 2016 02:05:14 -0000 Repository: hbase Updated Branches: refs/heads/master 49b0bab50 -> 2c107e4d0 HBASE-15432 TableInputFormat - support multi column family scan (Xuesen Liang) Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/2c107e4d Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/2c107e4d Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/2c107e4d Branch: refs/heads/master Commit: 2c107e4d081f85aa51057b1e39d9b594ef6f8f7a Parents: 49b0bab Author: tedyu Authored: Fri Dec 16 18:04:55 2016 -0800 Committer: tedyu Committed: Fri Dec 16 18:04:55 2016 -0800 ---------------------------------------------------------------------- .../hadoop/hbase/mapreduce/CellCounter.java | 2 +- .../apache/hadoop/hbase/mapreduce/Export.java | 7 ++--- .../hbase/mapreduce/TableInputFormat.java | 4 +-- .../hadoop/hbase/mapreduce/TestCellCounter.java | 12 ++++++- .../hbase/mapreduce/TestImportExport.java | 18 ++++++----- .../mapreduce/TestTableInputFormatScanBase.java | 33 +++++++++++--------- 6 files changed, 46 insertions(+), 30 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/2c107e4d/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java index 745a750..d39efaa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java @@ -305,7 +305,7 @@ public class CellCounter extends Configured implements Tool { System.err.println(" -D " + TableInputFormat.SCAN_ROW_START + "="); System.err.println(" -D " + TableInputFormat.SCAN_ROW_STOP + "="); System.err.println(" -D " + TableInputFormat.SCAN_COLUMNS + "=\" ...\""); - System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "="); + System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=,, ..."); System.err.println(" -D " + TableInputFormat.SCAN_TIMESTAMP + "="); System.err.println(" -D " + TableInputFormat.SCAN_TIMERANGE_START + "="); System.err.println(" -D " + TableInputFormat.SCAN_TIMERANGE_END + "="); http://git-wip-us.apache.org/repos/asf/hbase/blob/2c107e4d/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java index 56d229a..3a3988e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java @@ -110,9 +110,8 @@ public class Export extends Configured implements Tool { if (raw) { s.setRaw(raw); } - - if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) { - s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY))); + for (String columnFamily : conf.getTrimmedStrings(TableInputFormat.SCAN_COLUMN_FAMILY)) { + s.addFamily(Bytes.toBytes(columnFamily)); } // Set RowFilter or Prefix Filter if applicable. Filter exportFilter = getExportFilter(args); @@ -163,7 +162,7 @@ public class Export extends Configured implements Tool { System.err.println(" -D mapreduce.output.fileoutputformat.compress.type=BLOCK"); System.err.println(" Additionally, the following SCAN properties can be specified"); System.err.println(" to control/limit what is exported.."); - System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "="); + System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=,, ..."); System.err.println(" -D " + RAW_SCAN + "=true"); System.err.println(" -D " + TableInputFormat.SCAN_ROW_START + "="); System.err.println(" -D " + TableInputFormat.SCAN_ROW_STOP + "="); http://git-wip-us.apache.org/repos/asf/hbase/blob/2c107e4d/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java index 7ad68ea..4f6b307 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java @@ -161,8 +161,8 @@ implements Configurable { addColumns(scan, conf.get(SCAN_COLUMNS)); } - if (conf.get(SCAN_COLUMN_FAMILY) != null) { - scan.addFamily(Bytes.toBytes(conf.get(SCAN_COLUMN_FAMILY))); + for (String columnFamily : conf.getTrimmedStrings(SCAN_COLUMN_FAMILY)) { + scan.addFamily(Bytes.toBytes(columnFamily)); } if (conf.get(SCAN_TIMESTAMP) != null) { http://git-wip-us.apache.org/repos/asf/hbase/blob/2c107e4d/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java index 60e9b33..bdae989 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java @@ -310,7 +310,7 @@ public class TestCellCounter { /** * Test CellCounter for complete table all data should print to output */ - @Test(timeout = 300000) + @Test(timeout = 600000) public void testCellCounterForCompleteTable() throws Exception { TableName sourceTable = TableName.valueOf("testCellCounterForCompleteTable"); String outputPath = OUTPUT_DIR + sourceTable; @@ -346,8 +346,18 @@ public class TestCellCounter { assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1")); assertTrue(data.contains("row2;a;q_Versions" + "\t" + "1")); assertTrue(data.contains("row2;b;q_Versions" + "\t" + "1")); + + FileUtil.fullyDelete(new File(outputPath)); + args = new String[] { "-D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=a, b", + sourceTable.getNameAsString(), outputDir.toString(), ";"}; + runCount(args); + inputStream = new FileInputStream(outputPath + File.separator + "part-r-00000"); + String data2 = IOUtils.toString(inputStream); + inputStream.close(); + assertEquals(data, data2); } finally { t.close(); + localFileSystem.close(); FileUtil.fullyDelete(new File(outputPath)); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/2c107e4d/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java index 50146fd..bd3efa7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java @@ -572,16 +572,18 @@ public class TestImportExport { fail("should be SecurityException"); } catch (SecurityException e) { assertEquals(-1, newSecurityManager.getExitCode()); - assertTrue(data.toString().contains("Wrong number of arguments:")); - assertTrue(data.toString().contains( + String errMsg = data.toString(); + assertTrue(errMsg.contains("Wrong number of arguments:")); + assertTrue(errMsg.contains( "Usage: Export [-D ]* [ " + "[ []] [^[regex pattern] or [Prefix] to filter]]")); - assertTrue(data.toString().contains("-D hbase.mapreduce.scan.column.family=")); - assertTrue(data.toString().contains("-D hbase.mapreduce.include.deleted.rows=true")); - assertTrue(data.toString().contains("-Dhbase.client.scanner.caching=100")); - assertTrue(data.toString().contains("-Dmapreduce.map.speculative=false")); - assertTrue(data.toString().contains("-Dmapreduce.reduce.speculative=false")); - assertTrue(data.toString().contains("-Dhbase.export.scanner.batch=10")); + assertTrue( + errMsg.contains("-D hbase.mapreduce.scan.column.family=,, ...")); + assertTrue(errMsg.contains("-D hbase.mapreduce.include.deleted.rows=true")); + assertTrue(errMsg.contains("-Dhbase.client.scanner.caching=100")); + assertTrue(errMsg.contains("-Dmapreduce.map.speculative=false")); + assertTrue(errMsg.contains("-Dmapreduce.reduce.speculative=false")); + assertTrue(errMsg.contains("-Dhbase.export.scanner.batch=10")); } finally { System.setErr(oldPrintStream); System.setSecurityManager(SECURITY_MANAGER); http://git-wip-us.apache.org/repos/asf/hbase/blob/2c107e4d/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java index db5b57a..0f49333 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatScanBase.java @@ -65,7 +65,7 @@ public abstract class TestTableInputFormatScanBase { static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); static final TableName TABLE_NAME = TableName.valueOf("scantest"); - static final byte[] INPUT_FAMILY = Bytes.toBytes("contents"); + static final byte[][] INPUT_FAMILYS = {Bytes.toBytes("content1"), Bytes.toBytes("content2")}; static final String KEY_STARTROW = "startRow"; static final String KEY_LASTROW = "stpRow"; @@ -83,8 +83,8 @@ public abstract class TestTableInputFormatScanBase { // start mini hbase cluster TEST_UTIL.startMiniCluster(3); // create and fill table - table = TEST_UTIL.createMultiRegionTable(TABLE_NAME, INPUT_FAMILY); - TEST_UTIL.loadTable(table, INPUT_FAMILY, false); + table = TEST_UTIL.createMultiRegionTable(TABLE_NAME, INPUT_FAMILYS); + TEST_UTIL.loadTable(table, INPUT_FAMILYS, null, false); } @AfterClass @@ -110,21 +110,23 @@ public abstract class TestTableInputFormatScanBase { public void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { - if (value.size() != 1) { - throw new IOException("There should only be one input column"); + if (value.size() != 2) { + throw new IOException("There should be two input columns"); } Map>> - cf = value.getMap(); - if(!cf.containsKey(INPUT_FAMILY)) { + cfMap = value.getMap(); + + if (!cfMap.containsKey(INPUT_FAMILYS[0]) || !cfMap.containsKey(INPUT_FAMILYS[1])) { throw new IOException("Wrong input columns. Missing: '" + - Bytes.toString(INPUT_FAMILY) + "'."); + Bytes.toString(INPUT_FAMILYS[0]) + "' or '" + Bytes.toString(INPUT_FAMILYS[1]) + "'."); } - String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null)); + + String val0 = Bytes.toStringBinary(value.getValue(INPUT_FAMILYS[0], null)); + String val1 = Bytes.toStringBinary(value.getValue(INPUT_FAMILYS[1], null)); LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) + - ", value -> " + val); + ", value -> (" + val0 + ", " + val1 + ")"); context.write(key, key); } - } /** @@ -181,7 +183,8 @@ public abstract class TestTableInputFormatScanBase { "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty"); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); c.set(TableInputFormat.INPUT_TABLE, TABLE_NAME.getNameAsString()); - c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY)); + c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILYS[0]) + ", " + + Bytes.toString(INPUT_FAMILYS[1])); c.set(KEY_STARTROW, start != null ? start : ""); c.set(KEY_LASTROW, last != null ? last : ""); @@ -219,7 +222,8 @@ public abstract class TestTableInputFormatScanBase { LOG.info("Before map/reduce startup - job " + jobName); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); Scan scan = new Scan(); - scan.addFamily(INPUT_FAMILY); + scan.addFamily(INPUT_FAMILYS[0]); + scan.addFamily(INPUT_FAMILYS[1]); if (start != null) { scan.setStartRow(Bytes.toBytes(start)); } @@ -256,7 +260,8 @@ public abstract class TestTableInputFormatScanBase { LOG.info("Before map/reduce startup - job " + jobName); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); Scan scan = new Scan(); - scan.addFamily(INPUT_FAMILY); + scan.addFamily(INPUT_FAMILYS[0]); + scan.addFamily(INPUT_FAMILYS[1]); c.set("hbase.mapreduce.input.autobalance", "true"); c.set("hbase.mapreduce.input.autobalance.maxskewratio", ratio); c.set(KEY_STARTROW, "");