accumulo-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Billie J Rinaldi <billie.j.rina...@ugov.gov>
Subject Re: Deleting rows from the Java API
Date Thu, 10 May 2012 14:13:20 GMT
On Wednesday, May 9, 2012 1:53:23 PM, "David Medinets" <david.medinets@gmail.com> wrote:
> On 5/9/12, Billie J Rinaldi <billie.j.rinaldi@ugov.gov> wrote:
> > If you want to delete a
> > lot of things and deleteRows won't work for you, consider using a
> > majc scope
> > Filter that filters out what you don't want, compact the table, then
> > remove
> > the filter.
> 
> Is there an example that already does this? Would you consider writing
> one? Providing simple working java code is so very helpful.

Consider the following Java code:

package test;

import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.Filter;
import org.apache.hadoop.io.Text;

public class RangeColumnRemovalFilter extends Filter {
  private static final Range rangeToRemove = new Range("begin", "end");
  private static final Text colfToRemove = new Text("fam2");
  
  @Override
  public boolean accept(Key k, Value v) {
    return !(rangeToRemove.contains(k) && k.getColumnFamily().equals(colfToRemove));
  }
}

Of course, if you wanted to make this more configurable you could pass in the range and column
family as parameters.  Look at the init method of Filter to see how it receives a parameter
and the setNegate static method to see how parameters should be set on IteratorSetting objects.

Jar up the RangeColumnRemovalFilter and drop it in the lib/ext directory.  Open the accumulo
shell and type the following commands.

root@instanceName> createtable testtable
root@instanceName testtable> insert alpha fam1 qual1 val1
root@instanceName testtable> insert alpha fam2 qual2 val2
root@instanceName testtable> insert beta fam1 qual1 val1
root@instanceName testtable> insert beta fam2 qual2a val2
root@instanceName testtable> insert beta fam2 qual2b val2
root@instanceName testtable> insert beta fam3 qual3 val3
root@instanceName testtable> insert gamma fam2 qual2 val2
root@instanceName testtable> insert gamma fam3 qual3 val3
root@instanceName testtable> scan
alpha fam1:qual1 []    val1
alpha fam2:qual2 []    val2
beta fam1:qual1 []    val1
beta fam2:qual2a []    val2
beta fam2:qual2b []    val2
beta fam3:qual3 []    val3
gamma fam2:qual2 []    val2
gamma fam3:qual3 []    val3
root@instanceName testtable> setiter -t testtable -scan -majc -minc -p 1 -n rcRemoval -class
test.RangeColumnRemovalFilter
Filter accepts or rejects each Key/Value pair
----------> set RangeColumnRemovalFilter parameter negate, default false keeps k/v that
pass accept method, true rejects k/v that pass accept method: 
root@instanceName testtable> compact -t testtable -b begin -e end -w
10 10:07:40,148 [shell.Shell] INFO : Compacting table ...
10 10:07:40,903 [shell.Shell] INFO : Compaction of table testtable completed for given range
root@instanceName testtable> deleteiter -t testtable -scan -majc -minc -n rcRemoval
root@instanceName testtable> scan
alpha fam1:qual1 []    val1
alpha fam2:qual2 []    val2
beta fam1:qual1 []    val1
beta fam3:qual3 []    val3
gamma fam2:qual2 []    val2
gamma fam3:qual3 []    val3
root@instanceName testtable> deletetable testtable
Table: [testtable] has been deleted. 
root@instanceName> 

The following code shows how to apply the RangeColumnRemovalFilter programmatically.  If you
jar it up with the filter and drop it in lib/ext, you just have to type "accumulo test.RangeColumnRemovalFilterTest"
to run it.  You will need to either change the instance name, zookeeper host, username, and
password, or change the code to pull them from the command line.

package test;

import java.util.EnumSet;
import java.util.Map.Entry;

import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.ZooKeeperInstance;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.hadoop.io.Text;

public class RangeColumnRemovalFilterTest {
  public static void main(String[] args) throws Exception {
    Connector conn = new ZooKeeperInstance("instanceName", "zookeeperHost").getConnector("user",
"pass");
    conn.tableOperations().create("tableName");
    
    BatchWriter bw = conn.createBatchWriter("tableName", 200000l, 1000, 1);
    
    Mutation m = new Mutation("alpha"); // before "begin"
    m.put("fam1", "qual1", "val1");
    m.put("fam2", "qual2", "val2");
    bw.addMutation(m);
    
    m = new Mutation("beta"); // between "begin" and "end
    m.put("fam1", "qual1", "val1");
    m.put("fam2", "qual2a", "val2");
    m.put("fam2", "qual2b", "val2");
    m.put("fam3", "qual3", "val3");
    bw.addMutation(m);
    
    m = new Mutation("gamma"); // after "end"
    m.put("fam2", "qual2", "val2");
    m.put("fam3", "qual3", "val3");
    bw.addMutation(m);
    
    bw.close();
    
    System.out.println("Before:");
    for (Entry<Key,Value> entry : conn.createScanner("tableName", new Authorizations()))
{
      System.out.println(entry);
    }
    
    IteratorSetting is = new IteratorSetting(1, "rcRemoval", RangeColumnRemovalFilter.class);
    conn.tableOperations().attachIterator("tableName", is);
    conn.tableOperations().compact("tableName", new Text("begin"), new Text("end"), true,
true);
    conn.tableOperations().removeIterator("tableName", "rcRemoval", EnumSet.allOf(IteratorScope.class));
    
    System.out.println("\nAfter:");
    for (Entry<Key,Value> entry : conn.createScanner("tableName", new Authorizations()))
{
      System.out.println(entry);
    }
    
    conn.tableOperations().delete("tableName"); // remove the table so we can run the test
again
  }
}

Mime
View raw message