arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-1015 [Java] Schema-level metadata
Date Mon, 15 May 2017 20:19:41 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 681afabb4 -> b23b8643a


ARROW-1015 [Java] Schema-level metadata

Author: Emilio Lahr-Vivaz <elahrvivaz@ccri.com>

Closes #676 from elahrvivaz/ARROW-1015 and squashes the following commits:

9e94e1a [Emilio Lahr-Vivaz] Changing accessor to getCustomMetadata
9ffdb5a [Emilio Lahr-Vivaz] ARROW-1015 [Java] Schema-level metadata


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/b23b8643
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/b23b8643
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/b23b8643

Branch: refs/heads/master
Commit: b23b8643aa91bb00610dbe6fa566d11ec2f50402
Parents: 681afab
Author: Emilio Lahr-Vivaz <elahrvivaz@ccri.com>
Authored: Mon May 15 16:19:35 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Mon May 15 16:19:35 2017 -0400

----------------------------------------------------------------------
 .../apache/arrow/vector/VectorSchemaRoot.java   | 12 +++--
 .../apache/arrow/vector/file/ArrowReader.java   |  7 +--
 .../apache/arrow/vector/file/ArrowWriter.java   |  2 +-
 .../apache/arrow/vector/types/pojo/Schema.java  | 52 ++++++++++++++++++--
 .../apache/arrow/vector/file/TestArrowFile.java | 17 +++++--
 .../apache/arrow/vector/pojo/TestConvert.java   | 21 ++++++--
 6 files changed, 90 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/b23b8643/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java
index 29b9673..73deb0b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java
@@ -41,11 +41,15 @@ public class VectorSchemaRoot implements AutoCloseable {
   }
 
   public VectorSchemaRoot(List<Field> fields, List<FieldVector> fieldVectors,
int rowCount) {
-    if (fields.size() != fieldVectors.size()) {
+    this(new Schema(fields), fieldVectors, rowCount);
+  }
+
+  public VectorSchemaRoot(Schema schema, List<FieldVector> fieldVectors, int rowCount)
{
+    if (schema.getFields().size() != fieldVectors.size()) {
       throw new IllegalArgumentException("Fields must match field vectors. Found " +
-          fieldVectors.size() + " vectors and " + fields.size() + " fields");
+          fieldVectors.size() + " vectors and " + schema.getFields().size() + " fields");
     }
-    this.schema = new Schema(fields);
+    this.schema = schema;
     this.rowCount = rowCount;
     this.fieldVectors = fieldVectors;
     for (int i = 0; i < schema.getFields().size(); ++i) {
@@ -65,7 +69,7 @@ public class VectorSchemaRoot implements AutoCloseable {
       throw new IllegalArgumentException("The root vector did not create the right number
of children. found " +
         fieldVectors.size() + " expected " + schema.getFields().size());
     }
-    return new VectorSchemaRoot(schema.getFields(), fieldVectors, 0);
+    return new VectorSchemaRoot(schema, fieldVectors, 0);
   }
 
   public List<FieldVector> getFieldVectors() {

http://git-wip-us.apache.org/repos/asf/arrow/blob/b23b8643/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java
index f000885..b331ea7 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowReader.java
@@ -150,18 +150,19 @@ public abstract class ArrowReader<T extends ReadChannel> implements
DictionaryPr
    * Reads the schema and initializes the vectors
    */
   private void initialize() throws IOException {
-    Schema schema = readSchema(in);
+    Schema originalSchema = readSchema(in);
     List<Field> fields = new ArrayList<>();
     List<FieldVector> vectors = new ArrayList<>();
     Map<Long, Dictionary> dictionaries = new HashMap<>();
 
-    for (Field field: schema.getFields()) {
+    for (Field field: originalSchema.getFields()) {
       Field updated = toMemoryFormat(field, dictionaries);
       fields.add(updated);
       vectors.add(updated.createVector(allocator));
     }
+    Schema schema = new Schema(fields, originalSchema.getCustomMetadata());
 
-    this.root = new VectorSchemaRoot(fields, vectors, 0);
+    this.root = new VectorSchemaRoot(schema, vectors, 0);
     this.loader = new VectorLoader(root);
     this.dictionaries = Collections.unmodifiableMap(dictionaries);
   }

http://git-wip-us.apache.org/repos/asf/arrow/blob/b23b8643/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowWriter.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowWriter.java
index 35b44fb..4abaed0 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/file/ArrowWriter.java
@@ -78,7 +78,7 @@ public abstract class ArrowWriter implements AutoCloseable {
       fields.add(toMessageFormat(field, provider, dictionaryBatches));
     }
 
-    this.schema = new Schema(fields);
+    this.schema = new Schema(fields, root.getSchema().getCustomMetadata());
     this.dictionaries = Collections.unmodifiableList(new ArrayList<>(dictionaryBatches.values()));
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/b23b8643/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java
index cede3e8..82e2ef5 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java
@@ -25,10 +25,15 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Objects;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -36,8 +41,11 @@ import com.fasterxml.jackson.databind.ObjectReader;
 import com.fasterxml.jackson.databind.ObjectWriter;
 import com.google.common.base.Joiner;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
 import com.google.flatbuffers.FlatBufferBuilder;
 
+import org.apache.arrow.flatbuf.KeyValue;
+
 /**
  * An Arrow Schema
  */
@@ -76,24 +84,43 @@ public class Schema {
       childrenBuilder.add(convertField(schema.fields(i)));
     }
     List<Field> fields = childrenBuilder.build();
-    return new Schema(fields);
+    ImmutableMap.Builder<String, String> metadataBuilder = ImmutableMap.builder();
+    for (int i = 0; i < schema.customMetadataLength(); i++) {
+      KeyValue kv = schema.customMetadata(i);
+      String key = kv.key(), value = kv.value();
+      metadataBuilder.put(key == null ? "" : key, value == null ? "" : value);
+    }
+    Map<String, String> metadata = metadataBuilder.build();
+    return new Schema(fields, metadata);
   }
 
   private final List<Field> fields;
+  private final Map<String, String> metadata;
+
+  public Schema(Iterable<Field> fields) {
+    this(fields, null);
+  }
 
   @JsonCreator
-  public Schema(@JsonProperty("fields") Iterable<Field> fields) {
+  public Schema(@JsonProperty("fields") Iterable<Field> fields,
+                @JsonProperty("metadata") Map<String, String> metadata) {
     List<Field> fieldList = new ArrayList<>();
     for (Field field : fields) {
       fieldList.add(field);
     }
     this.fields = Collections.unmodifiableList(fieldList);
+    this.metadata = metadata == null ? ImmutableMap.<String, String>of() : ImmutableMap.copyOf(metadata);
   }
 
   public List<Field> getFields() {
     return fields;
   }
 
+  @JsonInclude(Include.NON_EMPTY)
+  public Map<String, String> getCustomMetadata() {
+    return metadata;
+  }
+
   /**
    * @param name the name of the field to return
    * @return the corresponding field
@@ -117,15 +144,28 @@ public class Schema {
       fieldOffsets[i] = fields.get(i).getField(builder);
     }
     int fieldsOffset = org.apache.arrow.flatbuf.Schema.createFieldsVector(builder, fieldOffsets);
+    int[] metadataOffsets = new int[metadata.size()];
+    Iterator<Entry<String, String>> metadataIterator = metadata.entrySet().iterator();
+    for (int i = 0; i < metadataOffsets.length; i ++) {
+      Entry<String, String> kv = metadataIterator.next();
+      int keyOffset = builder.createString(kv.getKey());
+      int valueOffset = builder.createString(kv.getValue());
+      KeyValue.startKeyValue(builder);
+      KeyValue.addKey(builder, keyOffset);
+      KeyValue.addValue(builder, valueOffset);
+      metadataOffsets[i] = KeyValue.endKeyValue(builder);
+    }
+    int metadataOffset = org.apache.arrow.flatbuf.Field.createCustomMetadataVector(builder,
metadataOffsets);
     org.apache.arrow.flatbuf.Schema.startSchema(builder);
     org.apache.arrow.flatbuf.Schema.addFields(builder, fieldsOffset);
+    org.apache.arrow.flatbuf.Schema.addCustomMetadata(builder, metadataOffset);
     return org.apache.arrow.flatbuf.Schema.endSchema(builder);
   }
 
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(fields);
+    return Objects.hash(fields, metadata);
   }
 
   @Override
@@ -133,11 +173,13 @@ public class Schema {
     if (!(obj instanceof Schema)) {
       return false;
     }
-    return Objects.equals(this.fields, ((Schema) obj).fields);
+    return Objects.equals(this.fields, ((Schema) obj).fields) &&
+           Objects.equals(this.metadata, ((Schema) obj).metadata);
   }
 
   @Override
   public String toString() {
-    return "Schema<" + Joiner.on(", ").join(fields) + ">";
+    String meta = metadata.isEmpty() ? "" : "(metadata: " + metadata.toString() + ")";
+    return "Schema<" + Joiner.on(", ").join(fields) + ">" + meta;
   }
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/b23b8643/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
index 9156110..3353112 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
@@ -29,6 +29,7 @@ import java.io.OutputStream;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -386,7 +387,7 @@ public class TestArrowFile extends BaseFileTest {
   }
 
   @Test
-  public void testWriteReadFieldMetadata() throws IOException {
+  public void testWriteReadMetadata() throws IOException {
     File file = new File("target/mytest_metadata.arrow");
     ByteArrayOutputStream stream = new ByteArrayOutputStream();
 
@@ -397,7 +398,11 @@ public class TestArrowFile extends BaseFileTest {
     childFields.add(new Field("list-child", new FieldType(true, ArrowType.List.INSTANCE,
null, metadata(4)),
                               ImmutableList.of(new Field("l1", FieldType.nullable(new ArrowType.Int(16
,true)), null))));
     Field field = new Field("meta", new FieldType(true, ArrowType.Struct.INSTANCE, null,
metadata(0)), childFields);
-    List<Field> fields = ImmutableList.of(field);
+    Map<String, String> metadata = new HashMap<>();
+    metadata.put("s1", "v1");
+    metadata.put("s2", "v2");
+    Schema originalSchema = new Schema(ImmutableList.of(field), metadata);
+    Assert.assertEquals(metadata, originalSchema.getCustomMetadata());
 
     // write
     try (BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original
vectors", 0, Integer.MAX_VALUE);
@@ -406,7 +411,7 @@ public class TestArrowFile extends BaseFileTest {
       vector.getMutator().setValueCount(0);
 
       List<FieldVector> vectors = ImmutableList.<FieldVector>of(vector);
-      VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 0);
+      VectorSchemaRoot root = new VectorSchemaRoot(originalSchema, vectors, 0);
 
       try (FileOutputStream fileOutputStream = new FileOutputStream(file);
            ArrowFileWriter fileWriter = new ArrowFileWriter(root, null, fileOutputStream.getChannel());
@@ -428,7 +433,8 @@ public class TestArrowFile extends BaseFileTest {
       VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
       Schema schema = root.getSchema();
       LOGGER.debug("reading schema: " + schema);
-      Assert.assertEquals(fields, schema.getFields());
+      Assert.assertEquals(originalSchema, schema);
+      Assert.assertEquals(originalSchema.getCustomMetadata(), schema.getCustomMetadata());
       Field top = schema.getFields().get(0);
       Assert.assertEquals(metadata(0), top.getMetadata());
       for (int i = 0; i < 4; i ++) {
@@ -443,7 +449,8 @@ public class TestArrowFile extends BaseFileTest {
       VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
       Schema schema = root.getSchema();
       LOGGER.debug("reading schema: " + schema);
-      Assert.assertEquals(fields, schema.getFields());
+      Assert.assertEquals(originalSchema, schema);
+      Assert.assertEquals(originalSchema.getCustomMetadata(), schema.getCustomMetadata());
       Field top = schema.getFields().get(0);
       Assert.assertEquals(metadata(0), top.getMetadata());
       for (int i = 0; i < 4; i ++) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/b23b8643/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
index e2dae29..62c21f7 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
@@ -21,6 +21,12 @@ import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
 import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
 import static org.junit.Assert.assertEquals;
 
+import java.util.HashMap;
+import java.util.Map;
+
+import com.google.common.collect.ImmutableList;
+import com.google.flatbuffers.FlatBufferBuilder;
+
 import org.apache.arrow.vector.types.TimeUnit;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.UnionMode;
@@ -36,9 +42,6 @@ import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.junit.Test;
 
-import com.google.common.collect.ImmutableList;
-import com.google.flatbuffers.FlatBufferBuilder;
-
 /**
  * Test conversion between Flatbuf and Pojo field representations
  */
@@ -70,6 +73,18 @@ public class TestConvert {
   }
 
   @Test
+  public void schemaMetadata() {
+    ImmutableList.Builder<Field> childrenBuilder = ImmutableList.builder();
+    childrenBuilder.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null));
+    childrenBuilder.add(new Field("child2", FieldType.nullable(new FloatingPoint(SINGLE)),
ImmutableList.<Field>of()));
+    Map<String, String> metadata = new HashMap<>();
+    metadata.put("key1", "value1");
+    metadata.put("key2", "value2");
+    Schema initialSchema = new Schema(childrenBuilder.build(), metadata);
+    run(initialSchema);
+  }
+
+  @Test
   public void nestedSchema() {
     ImmutableList.Builder<Field> childrenBuilder = ImmutableList.builder();
     childrenBuilder.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null));


Mime
View raw message