accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jtr...@apache.org
Subject svn commit: r1332678 - in /accumulo/contrib/trunk/pig/src/main/java/org/apache/accumulo/pig: AccumuloStorage.java AccumuloWholeRowStorage.java
Date Tue, 01 May 2012 14:55:15 GMT
Author: jtrost
Date: Tue May  1 14:55:15 2012
New Revision: 1332678

URL: http://svn.apache.org/viewvc?rev=1332678&view=rev
Log:
ACCUMULO-142

* replaced the old AccumuloStorage with the newest one from my github account
* added AccumuloWholeRowStorage
* still TODO, test cases...


Added:
    accumulo/contrib/trunk/pig/src/main/java/org/apache/accumulo/pig/AccumuloWholeRowStorage.java
Modified:
    accumulo/contrib/trunk/pig/src/main/java/org/apache/accumulo/pig/AccumuloStorage.java

Modified: accumulo/contrib/trunk/pig/src/main/java/org/apache/accumulo/pig/AccumuloStorage.java
URL: http://svn.apache.org/viewvc/accumulo/contrib/trunk/pig/src/main/java/org/apache/accumulo/pig/AccumuloStorage.java?rev=1332678&r1=1332677&r2=1332678&view=diff
==============================================================================
--- accumulo/contrib/trunk/pig/src/main/java/org/apache/accumulo/pig/AccumuloStorage.java
(original)
+++ accumulo/contrib/trunk/pig/src/main/java/org/apache/accumulo/pig/AccumuloStorage.java
Tue May  1 14:55:15 2012
@@ -51,219 +51,285 @@ import org.apache.pig.data.TupleFactory;
 
 /**
  * A LoadStoreFunc for retrieving data from and storing data to Accumulo
+ *
+ * A Key/Val pair will be returned as tuples: (key, colfam, colqual, colvis, timestamp, value).
All fields except timestamp are DataByteArray, timestamp is a long.
  * 
- * A Key/Val pair will be returned as tuples: (key, colfam, colqual, colvis, timestamp, value).
All fields except timestamp are DataByteArray, timestamp is a
- * long.
- * 
- * Tuples can be written in 2 forms: (key, colfam, colqual, colvis, value) OR (key, colfam,
colqual, value)
+ * Tuples can be written in 2 forms:
+ *  (key, colfam, colqual, colvis, value)
+ *    OR
+ *  (key, colfam, colqual, value)
  * 
  */
-public class AccumuloStorage extends LoadFunc implements StoreFuncInterface {
-  private static final Log logger = LogFactory.getLog(AccumuloStorage.class);
-  
-  private Configuration conf;
-  private RecordReader<Key,Value> reader;
-  private RecordWriter<Text,Mutation> writer;
-  
-  String inst;
-  String zookeepers;
-  String user;
-  String password;
-  String table;
-  Text tableName;
-  String auths;
-  Authorizations authorizations;
-  List<Pair<Text,Text>> columnFamilyColumnQualifierPairs = new LinkedList<Pair<Text,Text>>();
-  
-  String start = null;
-  String end = null;
-  
-  public AccumuloStorage() {}
-  
-  @Override
-  public Tuple getNext() throws IOException {
-    try {
-      // load the next pair
-      if (!reader.nextKeyValue())
-        return null;
-      
-      Key key = (Key) reader.getCurrentKey();
-      Value value = (Value) reader.getCurrentValue();
-      assert key != null && value != null;
-      
-      // and wrap it in a tuple
-      Tuple tuple = TupleFactory.getInstance().newTuple(6);
-      tuple.set(0, new DataByteArray(key.getRow().getBytes()));
-      tuple.set(1, new DataByteArray(key.getColumnFamily().getBytes()));
-      tuple.set(2, new DataByteArray(key.getColumnQualifier().getBytes()));
-      tuple.set(3, new DataByteArray(key.getColumnVisibility().getBytes()));
-      tuple.set(4, new Long(key.getTimestamp()));
-      tuple.set(5, new DataByteArray(value.get()));
-      return tuple;
-    } catch (InterruptedException e) {
-      throw new IOException(e.getMessage());
-    }
-  }
-  
-  @Override
-  public InputFormat getInputFormat() {
-    return new AccumuloInputFormat();
-  }
-  
-  @Override
-  public void prepareToRead(RecordReader reader, PigSplit split) {
-    this.reader = reader;
-  }
-  
-  private void setLocationFromUri(String location) throws IOException {
-    // ex:
-    // accumulo://table1?instance=myinstance&user=root&password=secret&zookeepers=127.0.0.1:2181&auths=PRIVATE,PUBLIC&columns=col1:cq1,col2:cq2&start=abc&end=z
-    String names[];
-    String columns = "";
-    try {
-      if (!location.startsWith("accumulo://"))
-        throw new Exception("Bad scheme.");
-      String[] urlParts = location.split("\\?");
-      if (urlParts.length > 1) {
-        for (String param : urlParts[1].split("&")) {
-          String[] pair = param.split("=");
-          if (pair[0].equals("instance"))
-            inst = pair[1];
-          else if (pair[0].equals("user"))
-            user = pair[1];
-          else if (pair[0].equals("password"))
-            password = pair[1];
-          else if (pair[0].equals("zookeepers"))
-            zookeepers = pair[1];
-          else if (pair[0].equals("auths"))
-            auths = pair[1];
-          else if (pair[0].equals("columns"))
-            columns = pair[1];
-          else if (pair[0].equals("start"))
-            start = pair[1];
-          else if (pair[0].equals("end"))
-            end = pair[1];
-        }
-      }
-      String[] parts = urlParts[0].split("/+");
-      table = parts[1];
-      tableName = new Text(table);
-      
-      if (auths == null || auths.equals("")) {
-        authorizations = new Authorizations();
-      } else {
-        authorizations = new Authorizations(auths.split(","));
-      }
-      
-      if (!columns.equals("")) {
-        for (String cfCq : columns.split(",")) {
-          if (cfCq.contains(":")) {
-            String[] c = cfCq.split(":");
-            columnFamilyColumnQualifierPairs.add(new Pair<Text,Text>(new Text(c[0]),
new Text(c[1])));
-          } else {
-            columnFamilyColumnQualifierPairs.add(new Pair<Text,Text>(new Text(cfCq),
null));
-          }
-        }
-      }
-      
-    } catch (Exception e) {
-      throw new IOException(
-          "Expected 'accumulo://<table>[?instance=<instanceName>&user=<user>&password=<password>&zookeepers=<zookeepers>&auths=<authorizations>&[start=startRow,end=endRow,columns=[cf1:cq1,cf2:cq2,...]]]':
"
-              + e.getMessage());
-    }
-  }
-  
-  @Override
-  public void setLocation(String location, Job job) throws IOException {
-    conf = job.getConfiguration();
-    setLocationFromUri(location);
-    
-    if (!conf.getBoolean(AccumuloInputFormat.class.getSimpleName() + ".configured", false))
{
-      AccumuloInputFormat.setInputInfo(conf, user, password.getBytes(), table, authorizations);
-      AccumuloInputFormat.setZooKeeperInstance(conf, inst, zookeepers);
-      if (columnFamilyColumnQualifierPairs.size() > 0)
-        AccumuloInputFormat.fetchColumns(conf, columnFamilyColumnQualifierPairs);
-      
-      AccumuloInputFormat.setRanges(conf, Collections.singleton(new Range(start, end)));
-    }
-  }
-  
-  @Override
-  public String relativeToAbsolutePath(String location, Path curDir) throws IOException {
-    return location;
-  }
-  
-  @Override
-  public void setUDFContextSignature(String signature) {
+public class AccumuloStorage extends LoadFunc implements StoreFuncInterface
+{
+    private static final Log logger = LogFactory.getLog(AccumuloStorage.class);
+
+    private Configuration conf;
+    private RecordReader<Key, Value> reader;
+    private RecordWriter<Text, Mutation> writer;
     
-  }
-  
-  /* StoreFunc methods */
-  public void setStoreFuncUDFContextSignature(String signature) {
+    String inst;
+    String zookeepers;
+    String user;
+    String password;
+    String table;
+    Text tableName;
+    String auths;
+    Authorizations authorizations;
+    List<Pair<Text, Text>> columnFamilyColumnQualifierPairs = new LinkedList<Pair<Text,Text>>();
     
-  }
-  
-  public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException
{
-    return relativeToAbsolutePath(location, curDir);
-  }
-  
-  public void setStoreLocation(String location, Job job) throws IOException {
-    conf = job.getConfiguration();
-    setLocationFromUri(location);
+    String start = null;
+    String end = null;
+
+    public AccumuloStorage(){}
+
+	@Override
+    public Tuple getNext() throws IOException
+    {
+        try
+        {
+            // load the next pair
+            if (!reader.nextKeyValue())
+                return null;
+            
+            Key key = (Key)reader.getCurrentKey();
+            Value value = (Value)reader.getCurrentValue();
+            assert key != null && value != null;
+            
+            // and wrap it in a tuple
+	        Tuple tuple = TupleFactory.getInstance().newTuple(6);
+            tuple.set(0, new DataByteArray(key.getRow().getBytes()));
+            tuple.set(1, new DataByteArray(key.getColumnFamily().getBytes()));
+            tuple.set(2, new DataByteArray(key.getColumnQualifier().getBytes()));
+            tuple.set(3, new DataByteArray(key.getColumnVisibility().getBytes()));
+            tuple.set(4, new Long(key.getTimestamp()));
+            tuple.set(5, new DataByteArray(value.get()));
+            return tuple;
+        }
+        catch (InterruptedException e)
+        {
+            throw new IOException(e.getMessage());
+        }
+    }    
+
+    @Override
+    public InputFormat getInputFormat()
+    {
+        return new AccumuloInputFormat();
+    }
+
+    @Override
+    public void prepareToRead(RecordReader reader, PigSplit split)
+    {
+        this.reader = reader;
+    }
+
+    private void setLocationFromUri(String location) throws IOException
+    {
+        // ex: accumulo://table1?instance=myinstance&user=root&password=secret&zookeepers=127.0.0.1:2181&auths=PRIVATE,PUBLIC&columns=col1|cq1,col2|cq2&start=abc&end=z
+        String names[];
+        String columns = "";
+        try
+        {
+            if (!location.startsWith("accumulo://"))
+                throw new Exception("Bad scheme.");
+            String[] urlParts = location.split("\\?");
+            if (urlParts.length > 1)
+            {
+                for (String param : urlParts[1].split("&"))
+                {
+                    String[] pair = param.split("=");
+                    if (pair[0].equals("instance"))
+                        inst = pair[1];
+                    else if (pair[0].equals("user"))
+                        user = pair[1];
+                    else if (pair[0].equals("password"))
+                        password = pair[1];
+                    else if (pair[0].equals("zookeepers"))
+                    	zookeepers = pair[1];
+                    else if (pair[0].equals("auths"))
+                    	auths = pair[1];
+                    else if (pair[0].equals("columns"))
+                    	columns = pair[1];
+                    else if (pair[0].equals("start"))
+                    	start = pair[1];
+                    else if (pair[0].equals("end"))
+                    	end = pair[1];
+                }
+            }
+            String[] parts = urlParts[0].split("/+");
+            table = parts[1];
+            tableName = new Text(table);
+            
+            if(auths == null || auths.equals(""))
+            {
+            	authorizations = new Authorizations();
+            }
+            else
+            {
+            	authorizations = new Authorizations(auths.split(","));
+            }
+            
+            if(!columns.equals("")){
+            	for(String cfCq : columns.split(","))
+            	{
+            		if(cfCq.contains("|"))
+            		{
+            			String[] c = cfCq.split("\\|");
+            			columnFamilyColumnQualifierPairs.add(new Pair<Text, Text>(new Text(c[0]),
new Text(c[1])));
+            		}
+            		else
+            		{
+            			columnFamilyColumnQualifierPairs.add(new Pair<Text, Text>(new Text(cfCq),
null));
+            		}
+            	}
+            }
+            	
+        }
+        catch (Exception e)
+        {
+            throw new IOException("Expected 'accumulo://<table>[?instance=<instanceName>&user=<user>&password=<password>&zookeepers=<zookeepers>&auths=<authorizations>&[start=startRow,end=endRow,columns=[cf1|cq1,cf2|cq2,...]]]':
" + e.getMessage());
+        }
+    }
+
+    @Override
+    public void setLocation(String location, Job job) throws IOException
+    {
+        conf = job.getConfiguration();
+        setLocationFromUri(location);
+        
+        if(!conf.getBoolean(AccumuloInputFormat.class.getSimpleName()+".configured", false))
+        {
+        	AccumuloInputFormat.setInputInfo(conf, user, password.getBytes(), table, authorizations);
+            AccumuloInputFormat.setZooKeeperInstance(conf, inst, zookeepers);
+            if(columnFamilyColumnQualifierPairs.size() > 0)
+            	AccumuloInputFormat.fetchColumns(conf, columnFamilyColumnQualifierPairs);
+            
+            AccumuloInputFormat.setRanges(conf, Collections.singleton(new Range(start, end)));
+        }
+    }
+
+    @Override
+    public String relativeToAbsolutePath(String location, Path curDir) throws IOException
+    {
+        return location;
+    }
+
+    @Override
+    public void setUDFContextSignature(String signature)
+    {
+        
+    }
+
+    /* StoreFunc methods */
+    public void setStoreFuncUDFContextSignature(String signature)
+    {
+        
+    }
+
+    public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException
+    {
+        return relativeToAbsolutePath(location, curDir);
+    }
     
-    if (!conf.getBoolean(AccumuloOutputFormat.class.getSimpleName() + ".configured", false))
{
-      AccumuloOutputFormat.setOutputInfo(conf, user, password.getBytes(), true, table);
-      AccumuloOutputFormat.setZooKeeperInstance(conf, inst, zookeepers);
-      AccumuloOutputFormat.setMaxLatency(conf, 10 * 1000);
-      AccumuloOutputFormat.setMaxMutationBufferSize(conf, 10 * 1000 * 1000);
-      AccumuloOutputFormat.setMaxWriteThreads(conf, 10);
-    }
-  }
-  
-  public OutputFormat getOutputFormat() {
-    return new AccumuloOutputFormat();
-  }
-  
-  public void checkSchema(ResourceSchema schema) throws IOException {
-    // we don't care about types, they all get casted to ByteBuffers
-  }
-  
-  public void prepareToWrite(RecordWriter writer) {
-    this.writer = writer;
-  }
-  
-  public void putNext(Tuple t) throws ExecException, IOException {
-    Mutation mut = new Mutation(objToText(t.get(0)));
-    Text cf = objToText(t.get(1));
-    Text cq = objToText(t.get(2));
+    public void setStoreLocation(String location, Job job) throws IOException
+    {
+        conf = job.getConfiguration();
+        setLocationFromUri(location);
+        
+        if(!conf.getBoolean(AccumuloOutputFormat.class.getSimpleName()+".configured", false))
+        {
+        	AccumuloOutputFormat.setOutputInfo(conf, user, password.getBytes(), true, table);
+            AccumuloOutputFormat.setZooKeeperInstance(conf, inst, zookeepers);
+            AccumuloOutputFormat.setMaxLatency(conf, 10*1000);
+            AccumuloOutputFormat.setMaxMutationBufferSize(conf, 10*1000*1000);
+            AccumuloOutputFormat.setMaxWriteThreads(conf, 10);
+        }
+    }
+
+    public OutputFormat getOutputFormat()
+    {
+        return new AccumuloOutputFormat();
+    }
+
+    public void checkSchema(ResourceSchema schema) throws IOException
+    {
+        // we don't care about types, they all get casted to ByteBuffers
+    }
+
+    public void prepareToWrite(RecordWriter writer)
+    {
+        this.writer = writer;
+    }
+
+    public void putNext(Tuple t) throws ExecException, IOException
+    {
+        Mutation mut = new Mutation(objToText(t.get(0)));
+        Text cf = objToText(t.get(1));
+    	Text cq = objToText(t.get(2));
+    	
+        if(t.size() > 4)
+        {
+        	Text cv = objToText(t.get(3));
+        	Value val = new Value(objToBytes(t.get(4)));
+        	if(cv.getLength() == 0)
+        	{
+        		mut.put(cf, cq, val);
+        	}
+        	else
+        	{
+        		mut.put(cf, cq, new ColumnVisibility(cv), val);
+        	}
+        }
+        else
+        {
+        	Value val = new Value(objToBytes(t.get(3)));
+        	mut.put(cf, cq, val);
+        }
+        
+        try {
+			writer.write(tableName, mut);
+		} catch (InterruptedException e) {
+			throw new IOException(e);
+		}
+    }
     
-    if (t.size() > 4) {
-      Text cv = objToText(t.get(3));
-      Value val = new Value(objToBytes(t.get(4)));
-      if (cv.getLength() == 0) {
-        mut.put(cf, cq, val);
-      } else {
-        mut.put(cf, cq, new ColumnVisibility(cv), val);
-      }
-    } else {
-      Value val = new Value(objToBytes(t.get(3)));
-      mut.put(cf, cq, val);
+    private static Text objToText(Object o)
+    {
+    	return new Text(objToBytes(o));
     }
     
-    try {
-      writer.write(tableName, mut);
-    } catch (InterruptedException e) {
-      throw new IOException(e);
-    }
-  }
-  
-  private static Text objToText(Object o) {
-    return new Text(((DataByteArray) o).get());
-  }
-  
-  private static byte[] objToBytes(Object o) {
-    return ((DataByteArray) o).get();
-  }
-  
-  public void cleanupOnFailure(String failure, Job job) {}
+    private static byte[] objToBytes(Object o)
+    {
+    	if (o instanceof String) {
+			String str = (String) o;
+			return str.getBytes();
+		}
+    	else if (o instanceof Long) {
+			Long l = (Long) o;
+			return l.toString().getBytes();
+		}
+    	else if (o instanceof Integer) {
+    		Integer l = (Integer) o;
+			return l.toString().getBytes();
+		}
+    	else if (o instanceof Boolean) {
+    		Boolean l = (Boolean) o;
+			return l.toString().getBytes();
+		}
+    	else if (o instanceof Float) {
+    		Float l = (Float) o;
+			return l.toString().getBytes();
+		}
+    	else if (o instanceof Double) {
+    		Double l = (Double) o;
+			return l.toString().getBytes();
+		}
+    	
+    	// TODO: handle DataBag, Map<Object, Object>, and Tuple
+    	
+    	return ((DataByteArray)o).get();
+    }
+
+    public void cleanupOnFailure(String failure, Job job){}
 }

Added: accumulo/contrib/trunk/pig/src/main/java/org/apache/accumulo/pig/AccumuloWholeRowStorage.java
URL: http://svn.apache.org/viewvc/accumulo/contrib/trunk/pig/src/main/java/org/apache/accumulo/pig/AccumuloWholeRowStorage.java?rev=1332678&view=auto
==============================================================================
--- accumulo/contrib/trunk/pig/src/main/java/org/apache/accumulo/pig/AccumuloWholeRowStorage.java
(added)
+++ accumulo/contrib/trunk/pig/src/main/java/org/apache/accumulo/pig/AccumuloWholeRowStorage.java
Tue May  1 14:55:15 2012
@@ -0,0 +1,356 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.accumulo.pig;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.SortedMap;
+
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.user.WholeRowIterator;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.pig.LoadFunc;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.StoreFuncInterface;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.DefaultDataBag;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+/**
+ * A LoadStoreFunc for retrieving data from and storing data to Accumulo
+ *
+ * A Key/Val pair will be returned as tuples: (key, colfam, colqual, colvis, timestamp, value).
All fields except timestamp are DataByteArray, timestamp is a long.
+ * 
+ * Tuples can be written in 2 forms:
+ *  (key, colfam, colqual, colvis, value)
+ *    OR
+ *  (key, colfam, colqual, value)
+ * 
+ */
+public class AccumuloWholeRowStorage extends LoadFunc implements StoreFuncInterface
+{
+    private static final Log LOG = LogFactory.getLog(AccumuloWholeRowStorage.class);
+
+    private Configuration conf;
+    private RecordReader<Key, Value> reader;
+    private RecordWriter<Text, Mutation> writer;
+    
+    String inst;
+    String zookeepers;
+    String user;
+    String password;
+    String table;
+    Text tableName;
+    String auths;
+    Authorizations authorizations;
+    List<Pair<Text, Text>> columnFamilyColumnQualifierPairs = new LinkedList<Pair<Text,Text>>();
+    
+    String start = null;
+    String end = null;
+
+    public AccumuloWholeRowStorage(){}
+
+	@Override
+    public Tuple getNext() throws IOException
+    {
+        try
+        {
+            // load the next pair
+            if (!reader.nextKeyValue())
+                return null;
+            
+            Key key = (Key)reader.getCurrentKey();
+            Value value = (Value)reader.getCurrentValue();
+            assert key != null && value != null;
+            
+            SortedMap<Key, Value> rowKVs =  WholeRowIterator.decodeRow(key, value);
+            List<Tuple> columns = new ArrayList<Tuple>(rowKVs.size());
+            for(Entry<Key, Value> e : rowKVs.entrySet())
+            {
+            	columns.add(columnToTuple(
+            			e.getKey().getColumnFamily(), 
+            			e.getKey().getColumnQualifier(), 
+            			e.getKey().getColumnVisibility(), 
+            			e.getKey().getTimestamp(), 
+            			e.getValue())
+            		);
+            }
+            
+            // and wrap it in a tuple
+	        Tuple tuple = TupleFactory.getInstance().newTuple(2);
+            tuple.set(0, new DataByteArray(key.getRow().getBytes()));
+            tuple.set(1, new DefaultDataBag(columns));
+            
+            return tuple;
+        }
+        catch (InterruptedException e)
+        {
+            throw new IOException(e.getMessage());
+        }
+    }    
+
+	private Tuple columnToTuple(Text colfam, Text colqual, Text colvis, long ts, Value val)
throws IOException
+    {
+        Tuple tuple = TupleFactory.getInstance().newTuple(5);
+        tuple.set(0, new DataByteArray(colfam.getBytes()));
+        tuple.set(1, new DataByteArray(colqual.getBytes()));
+        tuple.set(2, new DataByteArray(colvis.getBytes()));
+        tuple.set(3, new Long(ts));
+        tuple.set(4, new DataByteArray(val.get()));
+        return tuple;
+    }
+	
+    @Override
+    public InputFormat getInputFormat()
+    {
+        return new AccumuloInputFormat();
+    }
+
+    @Override
+    public void prepareToRead(RecordReader reader, PigSplit split)
+    {
+        this.reader = reader;
+    }
+
+    private void setLocationFromUri(String location) throws IOException
+    {
+        // ex: accumulo://table1?instance=myinstance&user=root&password=secret&zookeepers=127.0.0.1:2181&auths=PRIVATE,PUBLIC&columns=col1:cq1,col2:cq2&start=abc&end=z
+        String names[];
+        String columns = "";
+        try
+        {
+            if (!location.startsWith("accumulo://"))
+                throw new Exception("Bad scheme.");
+            String[] urlParts = location.split("\\?");
+            if (urlParts.length > 1)
+            {
+                for (String param : urlParts[1].split("&"))
+                {
+                    String[] pair = param.split("=");
+                    if (pair[0].equals("instance"))
+                        inst = pair[1];
+                    else if (pair[0].equals("user"))
+                        user = pair[1];
+                    else if (pair[0].equals("password"))
+                        password = pair[1];
+                    else if (pair[0].equals("zookeepers"))
+                    	zookeepers = pair[1];
+                    else if (pair[0].equals("auths"))
+                    	auths = pair[1];
+                    else if (pair[0].equals("columns"))
+                    	columns = pair[1];
+                    else if (pair[0].equals("start"))
+                    	start = pair[1];
+                    else if (pair[0].equals("end"))
+                    	end = pair[1];
+                }
+            }
+            String[] parts = urlParts[0].split("/+");
+            table = parts[1];
+            tableName = new Text(table);
+            
+            if(auths == null || auths.equals(""))
+            {
+            	authorizations = new Authorizations();
+            }
+            else
+            {
+            	authorizations = new Authorizations(auths.split(","));
+            }
+            
+            if(!columns.equals("")){
+            	for(String cfCq : columns.split(","))
+            	{
+            		if(cfCq.contains("|"))
+            		{
+            			String[] c = cfCq.split("\\|");
+            			columnFamilyColumnQualifierPairs.add(new Pair<Text, Text>(new Text(c[0]),
new Text(c[1])));
+            		}
+            		else
+            		{
+            			columnFamilyColumnQualifierPairs.add(new Pair<Text, Text>(new Text(cfCq),
null));
+            		}
+            	}
+            }
+            	
+        }
+        catch (Exception e)
+        {
+            throw new IOException("Expected 'accumulo://<table>[?instance=<instanceName>&user=<user>&password=<password>&zookeepers=<zookeepers>&auths=<authorizations>&[start=startRow,end=endRow,columns=[cf1:cq1,cf2:cq2,...]]]':
" + e.getMessage());
+        }
+    }
+
+    @Override
+    public void setLocation(String location, Job job) throws IOException
+    {
+        conf = job.getConfiguration();
+        setLocationFromUri(location);
+        
+        if(!conf.getBoolean(AccumuloInputFormat.class.getSimpleName()+".configured", false))
+        {
+        	AccumuloInputFormat.setInputInfo(conf, user, password.getBytes(), table, authorizations);
+            AccumuloInputFormat.setZooKeeperInstance(conf, inst, zookeepers);
+            if(columnFamilyColumnQualifierPairs.size() > 0)
+            {
+            	LOG.info("columns: "+columnFamilyColumnQualifierPairs);
+            	AccumuloInputFormat.fetchColumns(conf, columnFamilyColumnQualifierPairs);
+            }
+            
+            AccumuloInputFormat.setRanges(conf, Collections.singleton(new Range(start, end)));
+            AccumuloInputFormat.addIterator(conf, new IteratorSetting(10, WholeRowIterator.class));
+        }
+    }
+
+    @Override
+    public String relativeToAbsolutePath(String location, Path curDir) throws IOException
+    {
+        return location;
+    }
+
+    @Override
+    public void setUDFContextSignature(String signature)
+    {
+        
+    }
+
+    /* StoreFunc methods */
+    public void setStoreFuncUDFContextSignature(String signature)
+    {
+        
+    }
+
+    public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException
+    {
+        return relativeToAbsolutePath(location, curDir);
+    }
+    
+    public void setStoreLocation(String location, Job job) throws IOException
+    {
+        conf = job.getConfiguration();
+        setLocationFromUri(location);
+        
+        if(!conf.getBoolean(AccumuloOutputFormat.class.getSimpleName()+".configured", false))
+        {
+        	AccumuloOutputFormat.setOutputInfo(conf, user, password.getBytes(), true, table);
+            AccumuloOutputFormat.setZooKeeperInstance(conf, inst, zookeepers);
+            AccumuloOutputFormat.setMaxLatency(conf, 10*1000);
+            AccumuloOutputFormat.setMaxMutationBufferSize(conf, 10*1000*1000);
+            AccumuloOutputFormat.setMaxWriteThreads(conf, 10);
+        }
+    }
+
+    public OutputFormat getOutputFormat()
+    {
+        return new AccumuloOutputFormat();
+    }
+
+    public void checkSchema(ResourceSchema schema) throws IOException
+    {
+        // we don't care about types, they all get casted to ByteBuffers
+    }
+
+    public void prepareToWrite(RecordWriter writer)
+    {
+        this.writer = writer;
+    }
+
+    public void putNext(Tuple tuple) throws ExecException, IOException
+    {
+        Mutation mut = new Mutation(objToText(tuple.get(0)));
+        DefaultDataBag columns = (DefaultDataBag)tuple.get(1);
+        for(Tuple column : columns)
+        {
+        	Text cf = objToText(column.get(0));
+        	Text cq = objToText(column.get(1));
+        	Text cv = objToText(column.get(2));
+        	Long ts = (Long)column.get(3);
+        	Value val = new Value(objToBytes(column.get(4)));
+        	
+        	mut.put(cf, cq, new ColumnVisibility(cv), ts, val);
+        }
+        
+        try {
+			writer.write(tableName, mut);
+		} catch (InterruptedException e) {
+			throw new IOException(e);
+		}
+    }
+    
+    private static Text objToText(Object o)
+    {
+    	return new Text(objToBytes(o));
+    }
+        
+    private static byte[] objToBytes(Object o)
+    {
+    	if (o instanceof String) {
+			String str = (String) o;
+			return str.getBytes();
+		}
+    	else if (o instanceof Long) {
+			Long l = (Long) o;
+			return l.toString().getBytes();
+		}
+    	else if (o instanceof Integer) {
+    		Integer l = (Integer) o;
+			return l.toString().getBytes();
+		}
+    	else if (o instanceof Boolean) {
+    		Boolean l = (Boolean) o;
+			return l.toString().getBytes();
+		}
+    	else if (o instanceof Float) {
+    		Float l = (Float) o;
+			return l.toString().getBytes();
+		}
+    	else if (o instanceof Double) {
+    		Double l = (Double) o;
+			return l.toString().getBytes();
+		}
+    	
+    	// TODO: handle DataBag, Map<Object, Object>, and Tuple
+    	
+    	return ((DataByteArray)o).get();
+    }
+
+    public void cleanupOnFailure(String failure, Job job){}
+}



Mime
View raw message