jena-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rve...@apache.org
Subject [1/2] jena git commit: Support RDF Thrift as an input format
Date Tue, 11 Nov 2014 12:59:49 GMT
Repository: jena
Updated Branches:
  refs/heads/hadoop-rdf 3ccab77c7 -> f08fff2a9


http://git-wip-us.apache.org/repos/asf/jena/blob/f08fff2a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileTripleInputFormatTests.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileTripleInputFormatTests.java
b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileTripleInputFormatTests.java
index ee9a92c..56dd8ca 100644
--- a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileTripleInputFormatTests.java
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/compressed/AbstractCompressedWholeFileTripleInputFormatTests.java
@@ -16,128 +16,129 @@
  * limitations under the License.
  */
 
-package org.apache.jena.hadoop.rdf.io.input.compressed;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.compress.CompressionCodec;
+package org.apache.jena.hadoop.rdf.io.input.compressed;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.charset.Charset;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.jena.hadoop.rdf.io.HadoopIOConstants;
 import org.apache.jena.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests;
 import org.apache.jena.hadoop.rdf.types.TripleWritable;
-import org.apache.jena.riot.Lang;
-import org.apache.jena.riot.RDFDataMgr;
-
-import com.hp.hpl.jena.graph.Triple;
-import com.hp.hpl.jena.rdf.model.Model;
-import com.hp.hpl.jena.rdf.model.ModelFactory;
-import com.hp.hpl.jena.rdf.model.Property;
-import com.hp.hpl.jena.rdf.model.Resource;
-
-/**
- * Abstract tests for compressed whole file triple formats
- * 
- * 
- */
-public abstract class AbstractCompressedWholeFileTripleInputFormatTests extends
-        AbstractNodeTupleInputFormatTests<Triple, TripleWritable> {
-
-    @Override
-    protected Configuration prepareConfiguration() {
-        Configuration config = super.prepareConfiguration();
-        config.set(HadoopIOConstants.IO_COMPRESSION_CODECS, this.getCompressionCodec().getClass().getCanonicalName());
-        return config;
-    }
-
-    @Override
-    protected Writer getWriter(File f) throws IOException {
-        CompressionCodec codec = this.getCompressionCodec();
-        if (codec instanceof Configurable) {
-            ((Configurable) codec).setConf(this.prepareConfiguration());
-        }
-        FileOutputStream fileOutput = new FileOutputStream(f, false);
-        OutputStream output = codec.createOutputStream(fileOutput);
-        return new OutputStreamWriter(output);
-    }
-
-    /**
-     * Gets the compression codec to use
-     * 
-     * @return Compression codec
-     */
-    protected abstract CompressionCodec getCompressionCodec();
-
-    /**
-     * Indicates whether inputs can be split, defaults to false for compressed
-     * input tests
-     */
-    @Override
-    protected boolean canSplitInputs() {
-        return false;
-    }
-
-    @SuppressWarnings("deprecation")
-    private void writeTuples(Model m, Writer writer) {
-        RDFDataMgr.write(writer, m, this.getRdfLanguage());
-    }
-
-    /**
-     * Gets the RDF language to write out generated tuples in
-     * 
-     * @return RDF language
-     */
-    protected abstract Lang getRdfLanguage();
-
-    @Override
-    protected final void generateTuples(Writer writer, int num) throws IOException {
-        Model m = ModelFactory.createDefaultModel();
-        Resource currSubj = m.createResource("http://example.org/subjects/0");
-        Property predicate = m.createProperty("http://example.org/predicate");
-        for (int i = 0; i < num; i++) {
-            if (i % 10 == 0) {
-                currSubj = m.createResource("http://example.org/subjects/" + (i / 10));
-            }
-            m.add(currSubj, predicate, m.createTypedLiteral(i));
-        }
-        this.writeTuples(m, writer);
-        writer.close();
-    }
-
-    @Override
-    protected final void generateMixedTuples(Writer writer, int num) throws IOException {
-        // Write good data
-        Model m = ModelFactory.createDefaultModel();
-        Resource currSubj = m.createResource("http://example.org/subjects/0");
-        Property predicate = m.createProperty("http://example.org/predicate");
-        for (int i = 0; i < num / 2; i++) {
-            if (i % 10 == 0) {
-                currSubj = m.createResource("http://example.org/subjects/" + (i / 10));
-            }
-            m.add(currSubj, predicate, m.createTypedLiteral(i));
-        }
-        this.writeTuples(m, writer);
-
-        // Write junk data
-        for (int i = 0; i < num / 2; i++) {
-            writer.write("junk data\n");
-        }
-
-        writer.flush();
-        writer.close();
-    }
-
-    @Override
-    protected final void generateBadTuples(Writer writer, int num) throws IOException {
-        for (int i = 0; i < num; i++) {
-            writer.write("junk data\n");
-        }
-        writer.flush();
-        writer.close();
-    }
-}
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFDataMgr;
+
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.rdf.model.Property;
+import com.hp.hpl.jena.rdf.model.Resource;
+
+/**
+ * Abstract tests for compressed whole file triple formats
+ * 
+ * 
+ */
+public abstract class AbstractCompressedWholeFileTripleInputFormatTests extends
+        AbstractNodeTupleInputFormatTests<Triple, TripleWritable> {
+
+    private static final Charset utf8 = Charset.forName("utf-8");
+
+    @Override
+    protected Configuration prepareConfiguration() {
+        Configuration config = super.prepareConfiguration();
+        config.set(HadoopIOConstants.IO_COMPRESSION_CODECS, this.getCompressionCodec().getClass().getCanonicalName());
+        return config;
+    }
+
+    @Override
+    protected OutputStream getOutputStream(File f) throws IOException {
+        CompressionCodec codec = this.getCompressionCodec();
+        if (codec instanceof Configurable) {
+            ((Configurable) codec).setConf(this.prepareConfiguration());
+        }
+        FileOutputStream fileOutput = new FileOutputStream(f, false);
+        return codec.createOutputStream(fileOutput);
+    }
+
+    /**
+     * Gets the compression codec to use
+     * 
+     * @return Compression codec
+     */
+    protected abstract CompressionCodec getCompressionCodec();
+
+    /**
+     * Indicates whether inputs can be split, defaults to false for compressed
+     * input tests
+     */
+    @Override
+    protected boolean canSplitInputs() {
+        return false;
+    }
+
+    private void writeTuples(Model m, OutputStream output) {
+        RDFDataMgr.write(output, m, this.getRdfLanguage());
+    }
+
+    /**
+     * Gets the RDF language to write out generated tuples in
+     * 
+     * @return RDF language
+     */
+    protected abstract Lang getRdfLanguage();
+
+    @Override
+    protected final void generateTuples(OutputStream output, int num) throws IOException
{
+        Model m = ModelFactory.createDefaultModel();
+        Resource currSubj = m.createResource("http://example.org/subjects/0");
+        Property predicate = m.createProperty("http://example.org/predicate");
+        for (int i = 0; i < num; i++) {
+            if (i % 10 == 0) {
+                currSubj = m.createResource("http://example.org/subjects/" + (i / 10));
+            }
+            m.add(currSubj, predicate, m.createTypedLiteral(i));
+        }
+        this.writeTuples(m, output);
+        output.close();
+    }
+
+    @Override
+    protected final void generateMixedTuples(OutputStream output, int num) throws IOException
{
+        // Write good data
+        Model m = ModelFactory.createDefaultModel();
+        Resource currSubj = m.createResource("http://example.org/subjects/0");
+        Property predicate = m.createProperty("http://example.org/predicate");
+        for (int i = 0; i < num / 2; i++) {
+            if (i % 10 == 0) {
+                currSubj = m.createResource("http://example.org/subjects/" + (i / 10));
+            }
+            m.add(currSubj, predicate, m.createTypedLiteral(i));
+        }
+        this.writeTuples(m, output);
+
+        // Write junk data
+        byte[] junk = "junk data\n".getBytes(utf8);
+        for (int i = 0; i < num / 2; i++) {
+            output.write(junk);
+        }
+
+        output.flush();
+        output.close();
+    }
+
+    @Override
+    protected final void generateBadTuples(OutputStream output, int num) throws IOException
{
+        byte[] junk = "junk data\n".getBytes(utf8);
+        for (int i = 0; i < num; i++) {
+            output.write(junk);
+        }
+        output.flush();
+        output.close();
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/f08fff2a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/thrift/ThriftQuadInputTest.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/thrift/ThriftQuadInputTest.java
b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/thrift/ThriftQuadInputTest.java
new file mode 100644
index 0000000..8d79295
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/thrift/ThriftQuadInputTest.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.thrift;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileQuadInputFormatTests;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFLanguages;
+
+
+/**
+ * Tests for JSON-LD input
+ * 
+ *
+ */
+public class ThriftQuadInputTest extends AbstractWholeFileQuadInputFormatTests {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return RDFLanguages.THRIFT;
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return ".trdf";
+    }
+
+    @Override
+    protected InputFormat<LongWritable, QuadWritable> getInputFormat() {
+        return new ThriftQuadInputFormat();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/f08fff2a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/thrift/ThriftTripleInputTest.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/thrift/ThriftTripleInputTest.java
b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/thrift/ThriftTripleInputTest.java
new file mode 100644
index 0000000..6b5e0b7
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/test/java/org/apache/jena/hadoop/rdf/io/input/thrift/ThriftTripleInputTest.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.thrift;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.jena.hadoop.rdf.io.input.AbstractWholeFileTripleInputFormatTests;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFLanguages;
+
+
+/**
+ * Tests for JSON-LD input
+ * 
+ *
+ */
+public class ThriftTripleInputTest extends AbstractWholeFileTripleInputFormatTests {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return RDFLanguages.THRIFT;
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return ".trdf";
+    }
+
+    @Override
+    protected InputFormat<LongWritable, TripleWritable> getInputFormat() {
+        return new ThriftTripleInputFormat();
+    }
+
+}


Mime
View raw message