incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cr...@apache.org
Subject [29/45] git commit: Blur indexing via Hive now works. Warning it may be fragile.
Date Sun, 26 Oct 2014 17:55:27 GMT
Blur indexing via Hive now works.  Warning it may be fragile.


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/b1d2b57a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/b1d2b57a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/b1d2b57a

Branch: refs/heads/blur-384-random-port-cleanup
Commit: b1d2b57ac0f907a6426805c46f46606c765af4bd
Parents: 3a41d16
Author: Aaron McCurry <amccurry@gmail.com>
Authored: Wed Oct 8 10:29:38 2014 -0400
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Wed Oct 8 10:29:38 2014 -0400

----------------------------------------------------------------------
 .../mapreduce/lib/GenericBlurRecordWriter.java  |   4 +-
 contrib/blur-hive/pom.xml                       |   8 +-
 .../blur/hive/BlurHiveOutputCommitter.java      |  71 +++++++++
 .../apache/blur/hive/BlurHiveOutputFormat.java  |   7 +-
 .../apache/blur/hive/BlurHiveParitioner.java    |  69 +++++++++
 .../blur/hive/BlurHiveStorageHandler.java       |  18 +++
 .../blur/hive/BlurObjectInspectorGenerator.java |   4 +-
 .../java/org/apache/blur/hive/BlurSerDe.java    |   9 +-
 .../org/apache/blur/hive/BlurSerializer.java    | 153 ++++++++++++++++++-
 .../java/org/apache/blur/hive/CreateData.java   |  51 +++++++
 .../java/org/apache/blur/hive/RunHiveTest.java  |  27 ++++
 contrib/blur-hive/src/test/java/test.hive       |  74 +++++++++
 12 files changed, 485 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/b1d2b57a/blur-mapred-hadoop2/src/main/java/org/apache/blur/mapreduce/lib/GenericBlurRecordWriter.java
----------------------------------------------------------------------
diff --git a/blur-mapred-hadoop2/src/main/java/org/apache/blur/mapreduce/lib/GenericBlurRecordWriter.java
b/blur-mapred-hadoop2/src/main/java/org/apache/blur/mapreduce/lib/GenericBlurRecordWriter.java
index 06e9bc2..1077440 100644
--- a/blur-mapred-hadoop2/src/main/java/org/apache/blur/mapreduce/lib/GenericBlurRecordWriter.java
+++ b/blur-mapred-hadoop2/src/main/java/org/apache/blur/mapreduce/lib/GenericBlurRecordWriter.java
@@ -26,7 +26,7 @@ import org.apache.blur.analysis.FieldManager;
 import org.apache.blur.log.Log;
 import org.apache.blur.log.LogFactory;
 import org.apache.blur.lucene.LuceneVersionConstant;
-import org.apache.blur.lucene.codec.Blur022Codec;
+import org.apache.blur.lucene.codec.Blur024Codec;
 import org.apache.blur.mapreduce.lib.BlurMutate.MUTATE_TYPE;
 import org.apache.blur.server.TableContext;
 import org.apache.blur.store.hdfs.HdfsDirectory;
@@ -116,7 +116,7 @@ public class GenericBlurRecordWriter {
     Analyzer analyzer = _fieldManager.getAnalyzerForIndex();
 
     _conf = new IndexWriterConfig(LuceneVersionConstant.LUCENE_VERSION, analyzer);
-    _conf.setCodec(new Blur022Codec());
+    _conf.setCodec(new Blur024Codec(tableContext.getBlurConfiguration()));
     _conf.setSimilarity(tableContext.getSimilarity());
     TieredMergePolicy mergePolicy = (TieredMergePolicy) _conf.getMergePolicy();
     mergePolicy.setUseCompoundFile(false);

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/b1d2b57a/contrib/blur-hive/pom.xml
----------------------------------------------------------------------
diff --git a/contrib/blur-hive/pom.xml b/contrib/blur-hive/pom.xml
index dc02843..3261e9a 100644
--- a/contrib/blur-hive/pom.xml
+++ b/contrib/blur-hive/pom.xml
@@ -15,7 +15,7 @@
 		<groupId>org.apache.blur</groupId>
 		<artifactId>blur</artifactId>
 		<version>0.2.4-incubating-SNAPSHOT</version>
-		<relativePath>../pom.xml</relativePath>
+		<relativePath>../../pom.xml</relativePath>
 	</parent>
 	<modelVersion>4.0.0</modelVersion>
 	<groupId>org.apache.blur</groupId>
@@ -32,6 +32,12 @@
 			<version>0.13.1</version>
 		</dependency>
 		<dependency>
+			<groupId>org.apache.hive</groupId>
+			<artifactId>hive-cli</artifactId>
+			<version>0.13.1</version>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
 			<groupId>org.apache.blur</groupId>
 			<artifactId>blur-thrift</artifactId>
 			<version>${project.version}</version>

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/b1d2b57a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveOutputCommitter.java
----------------------------------------------------------------------
diff --git a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveOutputCommitter.java
b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveOutputCommitter.java
new file mode 100644
index 0000000..c46dacd
--- /dev/null
+++ b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveOutputCommitter.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.blur.hive;
+
+import java.io.IOException;
+
+import org.apache.blur.mapreduce.lib.BlurOutputCommitter;
+import org.apache.hadoop.mapred.JobContext;
+import org.apache.hadoop.mapred.OutputCommitter;
+import org.apache.hadoop.mapred.TaskAttemptContext;
+
+public class BlurHiveOutputCommitter extends OutputCommitter {
+
+  private BlurOutputCommitter _committer = new BlurOutputCommitter();
+
+  @Override
+  public void setupJob(JobContext jobContext) throws IOException {
+    _committer.setupJob(jobContext);
+  }
+
+  @Override
+  public void setupTask(TaskAttemptContext taskContext) throws IOException {
+    _committer.setupTask(taskContext);
+  }
+
+  @Override
+  public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException {
+    return _committer.needsTaskCommit(taskContext);
+  }
+
+  @Override
+  public void commitTask(TaskAttemptContext taskContext) throws IOException {
+    _committer.commitTask(taskContext);
+  }
+
+  @Override
+  public void abortTask(TaskAttemptContext taskContext) throws IOException {
+    _committer.abortTask(taskContext);
+  }
+
+  @Override
+  public void abortJob(JobContext jobContext, int status) throws IOException {
+    _committer.abortJob(jobContext, null);
+  }
+
+  @SuppressWarnings("deprecation")
+  @Override
+  public void cleanupJob(JobContext context) throws IOException {
+    _committer.cleanupJob(context);
+  }
+
+  @Override
+  public void commitJob(JobContext context) throws IOException {
+    _committer.commitJob(context);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/b1d2b57a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveOutputFormat.java
----------------------------------------------------------------------
diff --git a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveOutputFormat.java
b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveOutputFormat.java
index 2a7368a..836d16e 100644
--- a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveOutputFormat.java
+++ b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveOutputFormat.java
@@ -22,6 +22,7 @@ import java.util.Properties;
 import org.apache.blur.mapreduce.lib.BlurMutate;
 import org.apache.blur.mapreduce.lib.BlurMutate.MUTATE_TYPE;
 import org.apache.blur.mapreduce.lib.BlurRecord;
+import org.apache.blur.mapreduce.lib.CheckOutputSpecs;
 import org.apache.blur.mapreduce.lib.GenericBlurRecordWriter;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -37,7 +38,11 @@ public class BlurHiveOutputFormat implements HiveOutputFormat<Text,
BlurRecord>
 
   @Override
   public void checkOutputSpecs(FileSystem fileSystem, JobConf jobConf) throws IOException
{
-    throw new RuntimeException("Not Implemented");
+    try {
+      CheckOutputSpecs.checkOutputSpecs(jobConf, jobConf.getNumReduceTasks());
+    } catch (InterruptedException e) {
+      throw new IOException(e);
+    }
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/b1d2b57a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveParitioner.java
----------------------------------------------------------------------
diff --git a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveParitioner.java
b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveParitioner.java
new file mode 100644
index 0000000..1115361
--- /dev/null
+++ b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveParitioner.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.blur.hive;
+
+import org.apache.hadoop.hive.ql.io.HivePartitioner;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Partitioner;
+
+public class BlurHiveParitioner implements Partitioner<Writable, Writable>, HivePartitioner<Writable,
Writable> {
+
+  @Override
+  public void configure(JobConf job) {
+  }
+
+  @Override
+  public int getPartition(Writable key, Writable value, int numPartitions) {
+    if (value instanceof BytesWritable) {
+      Text rowId = getRowId((BytesWritable) value);
+      return (rowId.hashCode() & Integer.MAX_VALUE) % numPartitions;
+    }
+    throw new RuntimeException("Value of [" + value + "] is not supported.");
+  }
+
+  private Text getRowId(BytesWritable value) {
+    byte[] bs = value.getBytes();
+    int starting = find(bs, (byte) 1, 0);
+    int ending = find(bs, (byte) 1, starting + 1);
+    Text text = new Text();
+    // 00 01 30 31 01
+    // 0 1 2 3 4
+    // starting = 1
+    // ending = 4
+    starting++;
+    text.set(bs, starting, ending - starting);
+    return text;
+  }
+
+  private int find(byte[] bs, byte b, int pos) {
+    for (int i = pos; i < bs.length; i++) {
+      if (bs[i] == b) {
+        return i;
+      }
+    }
+    throw new RuntimeException("Seperator [" + b + "] not found.");
+  }
+
+  @Override
+  public int getBucket(Writable key, Writable value, int numBuckets) {
+    return getPartition(key, value, numBuckets);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/b1d2b57a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveStorageHandler.java
----------------------------------------------------------------------
diff --git a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveStorageHandler.java
b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveStorageHandler.java
index 5053ff9..348438a 100644
--- a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveStorageHandler.java
+++ b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurHiveStorageHandler.java
@@ -16,6 +16,11 @@
  */
 package org.apache.blur.hive;
 
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.blur.mapreduce.lib.BlurOutputFormat;
+import org.apache.blur.thrift.generated.TableDescriptor;
 import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.serde2.SerDe;
@@ -44,6 +49,19 @@ public class BlurHiveStorageHandler extends DefaultStorageHandler {
   @Override
   public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
     // Will set setup Table Descriptor and Output Committer.
+    jobConf.setPartitionerClass(BlurHiveParitioner.class);
+    jobConf.setOutputCommitter(BlurHiveOutputCommitter.class);
+    TableDescriptor tableDescriptor;
+    try {
+      tableDescriptor = BlurOutputFormat.getTableDescriptor(jobConf);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    jobConf.setNumReduceTasks(tableDescriptor.getShardCount());
+  }
+
+  @Override
+  public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String>
jobProperties) {
   }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/b1d2b57a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurObjectInspectorGenerator.java
----------------------------------------------------------------------
diff --git a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurObjectInspectorGenerator.java
b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurObjectInspectorGenerator.java
index 87ad250..0905cc9 100644
--- a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurObjectInspectorGenerator.java
+++ b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurObjectInspectorGenerator.java
@@ -59,8 +59,8 @@ public class BlurObjectInspectorGenerator {
   };
 
   private ObjectInspector _objectInspector;
-  private List<String> _columnNames;
-  private List<TypeInfo> _columnTypes;
+  private List<String> _columnNames = new ArrayList<String>();
+  private List<TypeInfo> _columnTypes = new ArrayList<TypeInfo>();
 
   public BlurObjectInspectorGenerator(Collection<ColumnDefinition> colDefs) throws
SerDeException {
     List<ColumnDefinition> colDefList = new ArrayList<ColumnDefinition>(colDefs);

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/b1d2b57a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurSerDe.java
----------------------------------------------------------------------
diff --git a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurSerDe.java b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurSerDe.java
index cc8e760..ba1b5aa 100644
--- a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurSerDe.java
+++ b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurSerDe.java
@@ -23,6 +23,7 @@ import java.util.Map;
 import java.util.Properties;
 
 import org.apache.blur.BlurConfiguration;
+import org.apache.blur.mapreduce.lib.BlurOutputFormat;
 import org.apache.blur.mapreduce.lib.BlurRecord;
 import org.apache.blur.thirdparty.thrift_0_9_0.TException;
 import org.apache.blur.thrift.BlurClient;
@@ -30,6 +31,7 @@ import org.apache.blur.thrift.generated.Blur.Iface;
 import org.apache.blur.thrift.generated.BlurException;
 import org.apache.blur.thrift.generated.ColumnDefinition;
 import org.apache.blur.thrift.generated.Schema;
+import org.apache.blur.thrift.generated.TableDescriptor;
 import org.apache.blur.utils.BlurConstants;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.serde2.AbstractSerDe;
@@ -71,12 +73,17 @@ public class BlurSerDe extends AbstractSerDe {
       if (!tableList.contains(table)) {
         throw new SerDeException("Table [" + table + "] does not exist.");
       }
-      // tableDescriptor = client.describe(table);
+      if (conf != null) {
+        TableDescriptor tableDescriptor = client.describe(table);
+        BlurOutputFormat.setTableDescriptor(conf, tableDescriptor);
+      }
       schema = client.schema(table);
     } catch (BlurException e) {
       throw new SerDeException(e);
     } catch (TException e) {
       throw new SerDeException(e);
+    } catch (IOException e) {
+      throw new SerDeException(e);
     }
 
     Map<String, ColumnDefinition> columns = schema.getFamilies().get(_family);

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/b1d2b57a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java
----------------------------------------------------------------------
diff --git a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java
index 0fcf392..b65698f 100644
--- a/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java
+++ b/contrib/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java
@@ -22,10 +22,32 @@ import java.util.Map;
 import org.apache.blur.mapreduce.lib.BlurRecord;
 import org.apache.blur.thrift.generated.ColumnDefinition;
 import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.lazy.LazyBoolean;
+import org.apache.hadoop.hive.serde2.lazy.LazyByte;
+import org.apache.hadoop.hive.serde2.lazy.LazyDate;
+import org.apache.hadoop.hive.serde2.lazy.LazyDouble;
+import org.apache.hadoop.hive.serde2.lazy.LazyFloat;
+import org.apache.hadoop.hive.serde2.lazy.LazyHiveChar;
+import org.apache.hadoop.hive.serde2.lazy.LazyHiveDecimal;
+import org.apache.hadoop.hive.serde2.lazy.LazyHiveVarchar;
+import org.apache.hadoop.hive.serde2.lazy.LazyInteger;
+import org.apache.hadoop.hive.serde2.lazy.LazyLong;
+import org.apache.hadoop.hive.serde2.lazy.LazyShort;
+import org.apache.hadoop.hive.serde2.lazy.LazyString;
+import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp;
+import org.apache.hadoop.hive.serde2.lazy.LazyVoid;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
 public class BlurSerializer {
@@ -52,17 +74,21 @@ public class BlurSerializer {
       }
       // ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector();
       String columnName = columnNames.get(i);
+      String stringValue = toString(structFieldData);
+      if (stringValue == null) {
+        continue;
+      }
       if (columnName.equals(BlurObjectInspectorGenerator.ROWID)) {
-        blurRecord.setRowId((String) structFieldData);
+        blurRecord.setRowId(stringValue);
       } else if (columnName.equals(BlurObjectInspectorGenerator.RECORDID)) {
-        blurRecord.setRecordId((String) structFieldData);
+        blurRecord.setRecordId(stringValue);
       } else {
         if (columnName.equals(BlurObjectInspectorGenerator.GEO_POINTVECTOR)
             || columnName.equals(BlurObjectInspectorGenerator.GEO_RECURSIVEPREFIX)
             || columnName.equals(BlurObjectInspectorGenerator.GEO_TERMPREFIX)) {
           throw new SerDeException("Not supported yet.");
         } else {
-          blurRecord.addColumn(columnName, toString(structFieldData));
+          blurRecord.addColumn(columnName, stringValue);
         }
       }
     }
@@ -70,7 +96,128 @@ public class BlurSerializer {
   }
 
   private String toString(Object o) {
+    if (o == null) {
+      return null;
+    }
+    if (o instanceof LazyBoolean) {
+      return lazyBoolean((LazyBoolean) o);
+    } else if (o instanceof LazyByte) {
+      return lazyByte((LazyByte) o);
+    } else if (o instanceof LazyDate) {
+      return lazyDate((LazyDate) o);
+    } else if (o instanceof LazyDouble) {
+      return lazyDouble((LazyDouble) o);
+    } else if (o instanceof LazyFloat) {
+      return lazyFloat((LazyFloat) o);
+    } else if (o instanceof LazyHiveChar) {
+      return lazyHiveChar((LazyHiveChar) o);
+    } else if (o instanceof LazyHiveDecimal) {
+      return lazyHiveDecimal((LazyHiveDecimal) o);
+    } else if (o instanceof LazyHiveVarchar) {
+      return lazyHiveVarchar((LazyHiveVarchar) o);
+    } else if (o instanceof LazyInteger) {
+      return lazyInteger((LazyInteger) o);
+    } else if (o instanceof LazyLong) {
+      return lazyLong((LazyLong) o);
+    } else if (o instanceof LazyShort) {
+      return lazyShort((LazyShort) o);
+    } else if (o instanceof LazyShort) {
+      return lazyString((LazyString) o);
+    } else if (o instanceof LazyTimestamp) {
+      return lazyTimestamp((LazyTimestamp) o);
+    } else if (o instanceof LazyVoid) {
+      return null;
+    }
     return o.toString();
   }
 
+  private String lazyInteger(LazyInteger o) {
+    IntWritable writableObject = o.getWritableObject();
+    if (writableObject == null) {
+      return null;
+    }
+    int i = writableObject.get();
+    return Integer.toString(i);
+  }
+
+  private String lazyLong(LazyLong o) {
+    LongWritable writableObject = o.getWritableObject();
+    if (writableObject == null) {
+      return null;
+    }
+    long l = writableObject.get();
+    return Long.toString(l);
+  }
+
+  private String lazyShort(LazyShort o) {
+    ShortWritable writableObject = o.getWritableObject();
+    if (writableObject == null) {
+      return null;
+    }
+    short s = writableObject.get();
+    return Short.toString(s);
+  }
+
+  private String lazyString(LazyString o) {
+    Text writableObject = o.getWritableObject();
+    if (writableObject == null) {
+      return null;
+    }
+    return writableObject.toString();
+  }
+
+  private String lazyTimestamp(LazyTimestamp o) {
+    throw new RuntimeException("Not implemented.");
+  }
+
+  private String lazyHiveVarchar(LazyHiveVarchar o) {
+    throw new RuntimeException("Not implemented.");
+  }
+
+  private String lazyHiveDecimal(LazyHiveDecimal o) {
+    throw new RuntimeException("Not implemented.");
+  }
+
+  private String lazyHiveChar(LazyHiveChar o) {
+    throw new RuntimeException("Not implemented.");
+  }
+
+  private String lazyFloat(LazyFloat o) {
+    FloatWritable writableObject = o.getWritableObject();
+    if (writableObject == null) {
+      return null;
+    }
+    float f = writableObject.get();
+    return Float.toString(f);
+  }
+
+  private String lazyDouble(LazyDouble o) {
+    DoubleWritable writableObject = o.getWritableObject();
+    if (writableObject == null) {
+      return null;
+    }
+    double d = writableObject.get();
+    return Double.toString(d);
+  }
+
+  private String lazyDate(LazyDate o) {
+    throw new RuntimeException("Not implemented.");
+  }
+
+  private String lazyByte(LazyByte o) {
+    ByteWritable writableObject = o.getWritableObject();
+    if (writableObject == null) {
+      return null;
+    }
+    byte b = writableObject.get();
+    return Integer.toString(b);
+  }
+
+  private String lazyBoolean(LazyBoolean lazyBoolean) {
+    BooleanWritable writableObject = lazyBoolean.getWritableObject();
+    if (writableObject == null) {
+      return null;
+    }
+    return Boolean.toString(writableObject.get());
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/b1d2b57a/contrib/blur-hive/src/test/java/org/apache/blur/hive/CreateData.java
----------------------------------------------------------------------
diff --git a/contrib/blur-hive/src/test/java/org/apache/blur/hive/CreateData.java b/contrib/blur-hive/src/test/java/org/apache/blur/hive/CreateData.java
new file mode 100644
index 0000000..d51841e
--- /dev/null
+++ b/contrib/blur-hive/src/test/java/org/apache/blur/hive/CreateData.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.blur.hive;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class CreateData {
+
+  private static final String SEP = new String(new char[] { 1 });
+
+  public static void main(String[] args) throws IOException {
+    Path path = new Path("hdfs://localhost:9000/user/hive/warehouse/test.db/input_data/data");
+    Configuration configuration = new Configuration();
+    FileSystem fileSystem = path.getFileSystem(configuration);
+    FSDataOutputStream outputStream = fileSystem.create(path);
+    PrintWriter print = new PrintWriter(outputStream);
+    for (int i = 0; i < 10; i++) {
+      String s = Integer.toString(i);
+      print.print(s);
+      print.print(SEP);
+      print.print(s);
+      for (int c = 0; c < 10; c++) {
+        print.print(SEP);
+        print.print(s);
+      }
+      print.println();
+    }
+    print.close();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/b1d2b57a/contrib/blur-hive/src/test/java/org/apache/blur/hive/RunHiveTest.java
----------------------------------------------------------------------
diff --git a/contrib/blur-hive/src/test/java/org/apache/blur/hive/RunHiveTest.java b/contrib/blur-hive/src/test/java/org/apache/blur/hive/RunHiveTest.java
new file mode 100644
index 0000000..3451c1c
--- /dev/null
+++ b/contrib/blur-hive/src/test/java/org/apache/blur/hive/RunHiveTest.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.blur.hive;
+
+import org.apache.hadoop.hive.cli.CliDriver;
+
+public class RunHiveTest {
+
+  public static void main(String[] args) throws Exception {
+    CliDriver.main(new String[] { "-f", "./src/test/java/test.hive" });
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/b1d2b57a/contrib/blur-hive/src/test/java/test.hive
----------------------------------------------------------------------
diff --git a/contrib/blur-hive/src/test/java/test.hive b/contrib/blur-hive/src/test/java/test.hive
new file mode 100644
index 0000000..21cca91
--- /dev/null
+++ b/contrib/blur-hive/src/test/java/test.hive
@@ -0,0 +1,74 @@
+set mapred.job.tracker=localhost:9001;
+set hive.metastore.warehouse.dir=hdfs://localhost:9000/user/hive/warehouse;
+
+add jar file:///Users/amccurry/Development/incubator-blur/contrib/blur-hive/target/blur-hive-0.2.4-incubating-SNAPSHOT-hadoop1.jar;
+
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/blur-console-0.2.4-incubating-SNAPSHOT-hadoop1-webapp.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/blur-core-0.2.4-incubating-SNAPSHOT-hadoop1.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/blur-gui-0.2.4-incubating-SNAPSHOT-hadoop1.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/blur-gui-0.2.4-incubating-SNAPSHOT-hadoop1.war;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/blur-mapred-hadoop1-0.2.4-incubating-SNAPSHOT-hadoop1.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/blur-query-0.2.4-incubating-SNAPSHOT-hadoop1.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/blur-shell-0.2.4-incubating-SNAPSHOT-hadoop1.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/blur-store-0.2.4-incubating-SNAPSHOT-hadoop1.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/blur-thrift-0.2.4-incubating-SNAPSHOT-hadoop1.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/blur-util-0.2.4-incubating-SNAPSHOT-hadoop1.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/commons-cli-1.2.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/concurrentlinkedhashmap-lru-1.3.2.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/guava-14.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/jackson-annotations-2.1.1.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/jackson-core-2.1.1.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/jackson-databind-2.1.1.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/jline-2.10.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/json-20090211.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/lucene-analyzers-common-4.3.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/lucene-codecs-4.3.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/lucene-core-4.3.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/lucene-highlighter-4.3.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/lucene-memory-4.3.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/lucene-queries-4.3.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/lucene-queryparser-4.3.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/lucene-sandbox-4.3.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/lucene-spatial-4.3.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/metrics-core-2.2.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/metrics-ganglia-2.2.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/metrics-graphite-2.2.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/metrics-servlet-2.2.0.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/spatial4j-0.3.jar;
+add jar /Users/amccurry/Development/incubator-blur/distribution/target/apache-blur-0.2.4-incubating-SNAPSHOT-hadoop1-bin/lib/zookeeper-3.4.5.jar;
+
+-- create database test;
+use test;
+
+--  CREATE TABLE test
+--  ROW FORMAT SERDE 'org.apache.blur.hive.BlurSerDe'
+--  WITH SERDEPROPERTIES (
+--    'blur.zookeeper.connection'='localhost',
+--    'blur.table'='test_hdfs',
+--    'blur.family'='fam0'
+--  )
+-- STORED BY 'org.apache.blur.hive.BlurHiveStorageHandler';
+
+desc test;
+
+-- create table input_data (
+-- rowid string,
+-- recordid string,
+-- col0 string,
+-- col1 string,
+-- col2 string,
+-- col3 string,
+-- col4 string,
+-- col5 string,
+-- col6 string,
+-- col7 string,
+-- col8 string,
+-- col9 string
+-- );
+
+select * from input_data;
+
+insert overwrite table test select * from input_data distribute by rowid;
+
+
+


Mime
View raw message