hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r834115 - in /hadoop/hbase/branches/0.20: CHANGES.txt bin/add_table.rb
Date Mon, 09 Nov 2009 15:46:52 GMT
Author: stack
Date: Mon Nov  9 15:46:49 2009
New Revision: 834115

URL: http://svn.apache.org/viewvc?rev=834115&view=rev
Log:
HBASE-1867 Tool to regenerate an hbase table from the data files

Added:
    hadoop/hbase/branches/0.20/bin/add_table.rb
Modified:
    hadoop/hbase/branches/0.20/CHANGES.txt

Modified: hadoop/hbase/branches/0.20/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/CHANGES.txt?rev=834115&r1=834114&r2=834115&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.20/CHANGES.txt Mon Nov  9 15:46:49 2009
@@ -48,6 +48,7 @@
    HBASE-1921  When the Master's session times out and there's only one, cluster is wedged
    HBASE-1947  If HBase starts/stops often in less than 24 hours, 
                you end up with lots of store files
+   HBASE-1867  Tool to regenerate an hbase table from the data files
 
 Release 0.20.1 - Released October 12th, 2009
   INCOMPATIBLE CHANGES

Added: hadoop/hbase/branches/0.20/bin/add_table.rb
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/bin/add_table.rb?rev=834115&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/bin/add_table.rb (added)
+++ hadoop/hbase/branches/0.20/bin/add_table.rb Mon Nov  9 15:46:49 2009
@@ -0,0 +1,123 @@
+# Script adds a table back to a running hbase.
+# Currently only works on a copied aside table.
+# You cannot parse arbitrary table name.
+# 
+# To see usage for this script, run: 
+#
+#  ${HBASE_HOME}/bin/hbase org.jruby.Main addtable.rb
+#
+include Java
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.HConstants
+import org.apache.hadoop.hbase.HRegionInfo
+import org.apache.hadoop.hbase.client.HTable
+import org.apache.hadoop.hbase.client.Delete
+import org.apache.hadoop.hbase.client.Put
+import org.apache.hadoop.hbase.client.Scan
+import org.apache.hadoop.hbase.HTableDescriptor
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.util.FSUtils
+import org.apache.hadoop.hbase.util.Writables
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.FileSystem
+import org.apache.commons.logging.LogFactory
+
+# Name of this script
+NAME = "add_table"
+
+# Print usage for this script
+def usage
+  puts 'Usage: %s.rb TABLE_DIR [alternate_tablename]' % NAME
+  exit!
+end
+
+# Get configuration to use.
+c = HBaseConfiguration.new()
+
+# Set hadoop filesystem configuration using the hbase.rootdir.
+# Otherwise, we'll always use localhost though the hbase.rootdir
+# might be pointing at hdfs location.
+c.set("fs.default.name", c.get(HConstants::HBASE_DIR))
+fs = FileSystem.get(c)
+
+# Get a logger and a metautils instance.
+LOG = LogFactory.getLog(NAME)
+
+# Check arguments
+if ARGV.size < 1 || ARGV.size > 2
+  usage
+end
+
+# Get cmdline args.
+srcdir = fs.makeQualified(Path.new(java.lang.String.new(ARGV[0])))
+
+# Get table name
+tableName = nil
+if ARGV.size > 1
+  tableName = ARGV[1]
+  raise IOError("Not supported yet")
+elsif
+  # If none provided use dirname
+  tableName = srcdir.getName()
+end
+HTableDescriptor.isLegalTableName(tableName.to_java_bytes)
+
+# Figure locations under hbase.rootdir 
+# Move directories into place; be careful not to overwrite.
+rootdir = FSUtils.getRootDir(c)
+tableDir = fs.makeQualified(Path.new(rootdir, tableName))
+
+# If a directory currently in place, move it aside.
+if srcdir.equals(tableDir)
+  LOG.info("Source directory is in place under hbase.rootdir: " + srcdir.toString());
+elsif fs.exists(tableDir)
+  movedTableName = tableName + "." + java.lang.System.currentTimeMillis().to_s
+  movedTableDir = Path.new(rootdir, java.lang.String.new(movedTableName))
+  LOG.warn("Moving " + tableDir.toString() + " aside as " + movedTableDir.toString());
+  raise IOError.new("Failed move of " + tableDir.toString()) unless fs.rename(tableDir, movedTableDir)
+  LOG.info("Moving " + srcdir.toString() + " to " + tableDir.toString());
+  raise IOError.new("Failed move of " + srcdir.toString()) unless fs.rename(srcdir, tableDir)
+end
+
+# Clean mentions of table from .META.
+# Scan the .META. and remove all lines that begin with tablename
+LOG.info("Deleting mention of " + tableName + " from .META.")
+metaTable = HTable.new(c, HConstants::META_TABLE_NAME)
+scan = Scan.new(tableName.to_java_bytes)
+scanner = metaTable.getScanner(scan)
+# Use java.lang.String doing compares.  Ruby String is a bit odd.
+tableNameStr = java.lang.String.new(tableName)
+while (result = scanner.next())
+  rowid = Bytes.toString(result.getRow())
+  rowidStr = java.lang.String.new(rowid)
+  if not rowidStr.startsWith(tableNameStr)
+    # Gone too far, break
+    break
+  end
+  LOG.info("Deleting row from catalog: " + rowid);
+  d = Delete.new(result.getRow())
+  metaTable.delete(d)
+end
+scanner.close()
+
+# Now, walk the table and per region, add an entry
+LOG.info("Walking " + srcdir.toString() + " adding regions to catalog table")
+statuses = fs.listStatus(srcdir)
+for status in statuses
+  next unless status.isDir()
+  next if status.getPath().getName() == "compaction.dir"
+  regioninfofile =  Path.new(status.getPath(), ".regioninfo")
+  unless fs.exists(regioninfofile)
+    LOG.warn("Missing .regioninfo: " + regioninfofile.toString())
+    next
+  end
+  is = fs.open(regioninfofile) 
+  hri = HRegionInfo.new()
+  hri.readFields(is)
+  is.close() 
+  # TODO: Need to redo table descriptor with passed table name and then recalculate the region
encoded names.
+  p = Put.new(hri.getRegionName())
+  p.add(HConstants::CATALOG_FAMILY, HConstants::REGIONINFO_QUALIFIER, Writables.getBytes(hri))
+  metaTable.put(p)
+  LOG.info("Added to catalog: " + hri.toString())
+end



Mime
View raw message