hbase-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Jonathan Gray (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (HBASE-3725) HBase increments from old value after delete and write to disk
Date Thu, 28 Apr 2011 17:16:03 GMT

     [ https://issues.apache.org/jira/browse/HBASE-3725?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Jonathan Gray updated HBASE-3725:
---------------------------------

    Attachment: HBASE-3725-v3.patch

This fixes the problem in the only simple way I could think of.

A new configuration option is added "hbase.hregion.increment.supportdeletes" which defaults
to true (because it is required for "correctness").

When this option is true, then when the scan against StoreFiles is done, it will also include
the MemStore.  This should ensure correctness for cases where delete markers are present in
the MemStore that need to apply to KVs in the StoreFiles.

I made this a configuration option because it makes increment operations less optimal, so
for increment workloads that do not need to support deletes, they can keep the option turned
off and avoid the double scan of the MemStore.

A potential optimal and correct solution to this could be to use the old Get delete tracker
which would retain delete information across files (for in-order file processing rather than
one mega merge).  Some work is going into re-integrating those, so if they do make it back
in the HBase, we could utilize them here.

This should suffice for now.

> HBase increments from old value after delete and write to disk
> --------------------------------------------------------------
>
>                 Key: HBASE-3725
>                 URL: https://issues.apache.org/jira/browse/HBASE-3725
>             Project: HBase
>          Issue Type: Bug
>          Components: io, regionserver
>    Affects Versions: 0.90.1
>            Reporter: Nathaniel Cook
>         Attachments: HBASE-3725-Test-v1.patch, HBASE-3725-v3.patch, HBASE-3725.patch
>
>
> Deleted row values are sometimes used for starting points on new increments.
> To reproduce:
> Create a row "r". Set column "x" to some default value.
> Force hbase to write that value to the file system (such as restarting the cluster).
> Delete the row.
> Call table.incrementColumnValue with "some_value"
> Get the row.
> The returned value in the column was incremented from the old value before the row was
deleted instead of being initialized to "some_value".
> Code to reproduce:
> {code}
> import java.io.IOException;
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.hbase.HBaseConfiguration;
> import org.apache.hadoop.hbase.HColumnDescriptor;
> import org.apache.hadoop.hbase.HTableDescriptor;
> import org.apache.hadoop.hbase.client.Delete;
> import org.apache.hadoop.hbase.client.Get;
> import org.apache.hadoop.hbase.client.HBaseAdmin;
> import org.apache.hadoop.hbase.client.HTableInterface;
> import org.apache.hadoop.hbase.client.HTablePool;
> import org.apache.hadoop.hbase.client.Increment;
> import org.apache.hadoop.hbase.client.Result;
> import org.apache.hadoop.hbase.util.Bytes;
> public class HBaseTestIncrement
> {
> 	static String tableName  = "testIncrement";
> 	static byte[] infoCF = Bytes.toBytes("info");
> 	static byte[] rowKey = Bytes.toBytes("test-rowKey");
> 	static byte[] newInc = Bytes.toBytes("new");
> 	static byte[] oldInc = Bytes.toBytes("old");
> 	/**
> 	 * This code reproduces a bug with increment column values in hbase
> 	 * Usage: First run part one by passing '1' as the first arg
> 	 *        Then restart the hbase cluster so it writes everything to disk
> 	 *	  Run part two by passing '2' as the first arg
> 	 *
> 	 * This will result in the old deleted data being found and used for the increment calls
> 	 *
> 	 * @param args
> 	 * @throws IOException
> 	 */
> 	public static void main(String[] args) throws IOException
> 	{
> 		if("1".equals(args[0]))
> 			partOne();
> 		if("2".equals(args[0]))
> 			partTwo();
> 		if ("both".equals(args[0]))
> 		{
> 			partOne();
> 			partTwo();
> 		}
> 	}
> 	/**
> 	 * Creates a table and increments a column value 10 times by 10 each time.
> 	 * Results in a value of 100 for the column
> 	 *
> 	 * @throws IOException
> 	 */
> 	static void partOne()throws IOException
> 	{
> 		Configuration conf = HBaseConfiguration.create();
> 		HBaseAdmin admin = new HBaseAdmin(conf);
> 		HTableDescriptor tableDesc = new HTableDescriptor(tableName);
> 		tableDesc.addFamily(new HColumnDescriptor(infoCF));
> 		if(admin.tableExists(tableName))
> 		{
> 			admin.disableTable(tableName);
> 			admin.deleteTable(tableName);
> 		}
> 		admin.createTable(tableDesc);
> 		HTablePool pool = new HTablePool(conf, Integer.MAX_VALUE);
> 		HTableInterface table = pool.getTable(Bytes.toBytes(tableName));
> 		//Increment unitialized column
> 		for (int j = 0; j < 10; j++)
> 		{
> 			table.incrementColumnValue(rowKey, infoCF, oldInc, (long)10);
> 			Increment inc = new Increment(rowKey);
> 			inc.addColumn(infoCF, newInc, (long)10);
> 			table.increment(inc);
> 		}
> 		Get get = new Get(rowKey);
> 		Result r = table.get(get);
> 		System.out.println("initial values: new " + Bytes.toLong(r.getValue(infoCF, newInc))
+ " old " + Bytes.toLong(r.getValue(infoCF, oldInc)));
> 	}
> 	/**
> 	 * First deletes the data then increments the column 10 times by 1 each time
> 	 *
> 	 * Should result in a value of 10 but it doesn't, it results in a values of 110
> 	 *
> 	 * @throws IOException
> 	 */
> 	static void partTwo()throws IOException
> 	{
> 		Configuration conf = HBaseConfiguration.create();
> 		HTablePool pool = new HTablePool(conf, Integer.MAX_VALUE);
> 		HTableInterface table = pool.getTable(Bytes.toBytes(tableName));
> 		
> 		Delete delete = new Delete(rowKey);
> 		table.delete(delete);
> 		//Increment columns
> 		for (int j = 0; j < 10; j++)
> 		{
> 			table.incrementColumnValue(rowKey, infoCF, oldInc, (long)1);
> 			Increment inc = new Increment(rowKey);
> 			inc.addColumn(infoCF, newInc, (long)1);
> 			table.increment(inc);
> 		}
> 		Get get = new Get(rowKey);
> 		Result r = table.get(get);
> 		System.out.println("after delete values: new " + Bytes.toLong(r.getValue(infoCF, newInc))
+ " old " + Bytes.toLong(r.getValue(infoCF, oldInc)));
> 	}
> }
> {code}

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

Mime
View raw message