arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-874: [JS] Read dictionary-encoded vectors
Date Tue, 09 May 2017 19:55:32 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 670612e6f -> 22c738cc7


ARROW-874: [JS] Read dictionary-encoded vectors

Author: Brian Hulette <brian.hulette@ccri.com>
Author: Emilio Lahr-Vivaz <elahrvivaz@ccri.com>
Author: Brian Hulette <hulettbh@gmail.com>

Closes #655 from TheNeuralBit/js_dictionary and squashes the following commits:

4fbaf9d [Brian Hulette] add unit tests, fix errors in file format dictionary reading
d89c84b [Brian Hulette] Add dictionary support for file format
5bdf8a1 [Emilio Lahr-Vivaz] dictionary encoding
c6eff38 [Brian Hulette] Updated API documenation
4c84362 [Brian Hulette] added struct_example tests (Struct type, and multiple record batches)
5a2efe6 [Brian Hulette] add tests for streaming format
9aed94b [Brian Hulette] Fix file format, unit tests
e2e0d4d [Emilio Lahr-Vivaz] renaming reader method
844b5e9 [Emilio Lahr-Vivaz] fix for file format
bfb7754 [Emilio Lahr-Vivaz] cleanup
162c9be [Emilio Lahr-Vivaz] working for streaming
290497f [Emilio Lahr-Vivaz] js support multiple record batches
4b3b412 [Emilio Lahr-Vivaz] initial support for streaming file format, added FixedSizeList
c9d705d [Brian Hulette] Created npm build script
53db587 [Brian Hulette] Fixes to make tests pass
304c669 [Brian Hulette] Added basic js unit tests


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/22c738cc
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/22c738cc
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/22c738cc

Branch: refs/heads/master
Commit: 22c738cc7b26bff9e7319d438dd3fef1238d46ad
Parents: 670612e
Author: Brian Hulette <brian.hulette@ccri.com>
Authored: Tue May 9 15:55:27 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Tue May 9 15:55:27 2017 -0400

----------------------------------------------------------------------
 js/.gitignore                       |   2 +
 js/README.md                        |  17 +-
 js/bin/arrow2csv.js                 |   9 +-
 js/bin/arrow_schema.js              |   8 +-
 js/examples/read_file.html          |   6 +-
 js/flatbuffers.sh                   |  19 +
 js/lib/arrow.ts                     | 440 +++++++++++++++++++----
 js/lib/bitarray.ts                  |  17 +-
 js/lib/types.ts                     | 581 ++++++++++++++++++++++---------
 js/package.json                     |  12 +-
 js/postinstall.sh                   |  18 -
 js/spec/arrow.js                    | 179 ++++++++++
 js/spec/dictionary-stream.arrow     | Bin 0 -> 1776 bytes
 js/spec/dictionary.arrow            | Bin 0 -> 2522 bytes
 js/spec/simple-stream.arrow         | Bin 0 -> 1188 bytes
 js/spec/simple.arrow                | Bin 0 -> 1642 bytes
 js/spec/struct_example-stream.arrow | Bin 0 -> 1884 bytes
 js/spec/struct_example.arrow        | Bin 0 -> 2354 bytes
 18 files changed, 1026 insertions(+), 282 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/.gitignore
----------------------------------------------------------------------
diff --git a/js/.gitignore b/js/.gitignore
index 3b97e3a..f67c1cc 100644
--- a/js/.gitignore
+++ b/js/.gitignore
@@ -2,3 +2,5 @@ lib/*_generated.js
 dist
 node_modules
 typings
+.idea
+*.iml

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/README.md
----------------------------------------------------------------------
diff --git a/js/README.md b/js/README.md
index 98ef756..cdabf54 100644
--- a/js/README.md
+++ b/js/README.md
@@ -20,6 +20,7 @@ From this directory, run:
 $ npm install   # pull dependencies
 $ tsc           # build typescript
 $ webpack       # bundle for the browser
+$ npm test      # run unit tests
 ```
 
 ### Usage
@@ -42,9 +43,19 @@ Include `dist/arrow-bundle.js` in a `<script />` tag:
 See [examples/read_file.html](examples/read_file.html) for a usage example - or try it out now at [theneuralbit.github.io/arrow](http://theneuralbit.github.io/arrow)
 
 ### API
-##### `arrow.loadSchema(buffer)`
+##### `arrow.getReader(buffer)`
+Returns an `ArrowReader` object representing the Arrow file or stream contained in
+the `buffer`.
+
+##### `ArrowReader.loadNextBatch()`
+Loads the next record batch and returns it's length.
+
+##### `ArrowReader.getSchema()`
 Returns a JSON representation of the file's Arrow schema.
 
-##### `arrow.loadVectors(buffer)`
-Returns a dictionary of `Vector` objects, one for each column, indexed by the column's name.
+##### `ArrowReader.getVectors()`
+Returns a list of `Vector` objects, one for each column.
 Vector objects have, at minimum, a `get(i)` method and a `length` attribute.
+
+##### `ArrowReader.getVector(name: String)`
+Return a Vector object for column `name`

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/bin/arrow2csv.js
----------------------------------------------------------------------
diff --git a/js/bin/arrow2csv.js b/js/bin/arrow2csv.js
index 48df2f9..8122e95 100755
--- a/js/bin/arrow2csv.js
+++ b/js/bin/arrow2csv.js
@@ -19,7 +19,7 @@
 
 var fs = require('fs')
 var process = require('process');
-var loadVectors = require('../dist/arrow.js').loadVectors;
+var arrow = require('../dist/arrow.js');
 var program = require('commander');
 
 function list (val) {
@@ -38,10 +38,11 @@ if (!program.schema) {
 }
 
 var buf = fs.readFileSync(process.argv[process.argv.length - 1]);
-var vectors = loadVectors(buf);
+var reader = arrow.getReader(buf);
+reader.loadNextBatch();
 
-for (var i = 0; i < vectors[program.schema[0]].length; i += 1|0) {
+for (var i = 0; i < reader.getVector(program.schema[0]).length; i += 1|0) {
     console.log(program.schema.map(function (field) {
-        return '' + vectors[field].get(i);
+        return '' + reader.getVector(field).get(i);
     }).join(','));
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/bin/arrow_schema.js
----------------------------------------------------------------------
diff --git a/js/bin/arrow_schema.js b/js/bin/arrow_schema.js
index 7044778..44dabb4 100755
--- a/js/bin/arrow_schema.js
+++ b/js/bin/arrow_schema.js
@@ -19,7 +19,11 @@
 
 var fs = require('fs');
 var process = require('process');
-var loadSchema = require('../dist/arrow.js').loadSchema;
+var arrow = require('../dist/arrow.js');
 
 var buf = fs.readFileSync(process.argv[process.argv.length - 1]);
-console.log(JSON.stringify(loadSchema(buf), null, '\t'));
+var reader = arrow.getReader(buf);
+console.log(JSON.stringify(reader.getSchema(), null, '\t'));
+//console.log(JSON.stringify(reader.getVectors(), null, '\t'));
+console.log('block count: ' + reader.getBatchCount());
+

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/examples/read_file.html
----------------------------------------------------------------------
diff --git a/js/examples/read_file.html b/js/examples/read_file.html
index 02b6f08..933b142 100644
--- a/js/examples/read_file.html
+++ b/js/examples/read_file.html
@@ -40,9 +40,11 @@ function addCell (tr, type, name) {
 }
 reader.onload = function (evt) {
   var buf = new Uint8Array(evt.target.result);
-  var schema = arrow.loadSchema(buf);
-  var vectors = arrow.loadVectors(buf);
+  var schema = arrow.loadSchemaFromStream(buf);
+  var vectors = arrow.loadVectorsFromStream(buf);
   var length = vectors[schema[0].name].length;
+console.log(JSON.stringify(schema, null, '\t'));
+console.log(JSON.stringify(vectors, null, '\t'));
 
   var thead = document.getElementById("thead");
   var tbody = document.getElementById("tbody");

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/flatbuffers.sh
----------------------------------------------------------------------
diff --git a/js/flatbuffers.sh b/js/flatbuffers.sh
new file mode 100755
index 0000000..99d2815
--- /dev/null
+++ b/js/flatbuffers.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. See accompanying LICENSE file.
+
+echo "Compiling flatbuffer schemas..."
+#flatc -o lib --js ../format/Message.fbs ../format/File.fbs
+flatc -o lib --js ../format/*.fbs
+rm -f lib/Arrow_generated.js
+cat lib/*_generated.js > lib/Arrow_generated.js

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/lib/arrow.ts
----------------------------------------------------------------------
diff --git a/js/lib/arrow.ts b/js/lib/arrow.ts
index 0762885..74def4d 100644
--- a/js/lib/arrow.ts
+++ b/js/lib/arrow.ts
@@ -17,48 +17,186 @@
 
 import { flatbuffers } from 'flatbuffers';
 import { org } from './Arrow_generated';
-var arrow = org.apache.arrow;
 import { vectorFromField, Vector } from './types';
 
-export function loadVectors(buf) {
-    var fileLength = buf.length, bb, footerLengthOffset, footerLength,
-        footerOffset, footer, schema, field, type, type_str, i,
-        len, rb_metas, rb_meta, rtrn, recordBatchBlock, recordBatchBlocks = [];
-    var vectors : Vector[] = [];
+import ByteBuffer = flatbuffers.ByteBuffer;
+var Footer = org.apache.arrow.flatbuf.Footer;
+var Message = org.apache.arrow.flatbuf.Message;
+var MessageHeader = org.apache.arrow.flatbuf.MessageHeader;
+var RecordBatch = org.apache.arrow.flatbuf.RecordBatch;
+var DictionaryBatch = org.apache.arrow.flatbuf.DictionaryBatch;
+var Schema = org.apache.arrow.flatbuf.Schema;
+var Type = org.apache.arrow.flatbuf.Type;
+var VectorType = org.apache.arrow.flatbuf.VectorType;
+
+export class ArrowReader {
+
+    private bb;
+    private schema: any = [];
+    private vectors: Vector[];
+    private vectorMap: any = {};
+    private dictionaries: any = {};
+    private batches: any = [];
+    private batchIndex: number = 0;
+
+    constructor(bb, schema, vectors: Vector[], batches, dictionaries) {
+        this.bb = bb;
+        this.schema = schema;
+        this.vectors = vectors;
+        for (var i = 0; i < vectors.length; i += 1|0) {
+            this.vectorMap[vectors[i].name] = vectors[i]
+        }
+        this.batches = batches;
+        this.dictionaries = dictionaries;
+    }
+
+    loadNextBatch() {
+        if (this.batchIndex < this.batches.length) {
+            var batch = this.batches[this.batchIndex];
+            this.batchIndex += 1;
+            loadVectors(this.bb, this.vectors, batch);
+            return batch.length;
+        } else {
+            return 0;
+        }
+    }
+
+    getSchema() {
+        return this.schema;
+    }
+
+    getVectors() {
+        return this.vectors;
+    }
+
+    getVector(name) {
+        return this.vectorMap[name];
+    }
 
-    bb = new flatbuffers.ByteBuffer(buf);
+    getBatchCount() {
+        return this.batches.length;
+    }
+
+    // the index of the next batch to be loaded
+    getBatchIndex() {
+        return this.batchIndex;
+    }
 
-    footer = _loadFooter(bb);
+    // set the index of the next batch to be loaded
+    setBatchIndex(i: number) {
+        this.batchIndex = i;
+    }
+}
 
-    schema = footer.schema();
+export function getSchema(buf) { return getReader(buf).getSchema(); }
+
+export function getReader(buf) : ArrowReader {
+    if (_checkMagic(buf, 0)) {
+        return getFileReader(buf);
+    } else {
+        return getStreamReader(buf);
+    }
+}
+
+export function getStreamReader(buf) : ArrowReader {
+    var bb = new ByteBuffer(buf);
+
+    var schema = _loadSchema(bb),
+        field,
+        vectors: Vector[] = [],
+        i,j,
+        iLen,jLen,
+        batch,
+        recordBatches = [],
+        dictionaryBatches = [],
+        dictionaries = {};
+
+    for (i = 0, iLen = schema.fieldsLength(); i < iLen; i += 1|0) {
+        field = schema.fields(i);
+        _createDictionaryVectors(field, dictionaries);
+        vectors.push(vectorFromField(field, dictionaries));
+    }
+
+    while (bb.position() < bb.capacity()) {
+      batch = _loadBatch(bb);
+      if (batch == null) {
+          break;
+      } else if (batch.type == MessageHeader.DictionaryBatch) {
+          dictionaryBatches.push(batch);
+      } else if (batch.type == MessageHeader.RecordBatch) {
+          recordBatches.push(batch)
+      } else {
+          console.error("Expected batch type" + MessageHeader.RecordBatch + " or " +
+              MessageHeader.DictionaryBatch + " but got " + batch.type);
+      }
+    }
+
+    // load dictionary vectors
+    for (i = 0; i < dictionaryBatches.length; i += 1|0) {
+      batch = dictionaryBatches[i];
+      loadVectors(bb, [dictionaries[batch.id]], batch);
+    }
+
+    return new ArrowReader(bb, parseSchema(schema), vectors, recordBatches, dictionaries);
+}
+
+export function getFileReader (buf) : ArrowReader {
+    var bb = new ByteBuffer(buf);
+
+    var footer = _loadFooter(bb);
+
+    var schema = footer.schema();
+    var i, len, field,
+        vectors: Vector[] = [],
+        block,
+        batch,
+        recordBatchBlocks = [],
+        dictionaryBatchBlocks = [],
+        dictionaries = {};
 
     for (i = 0, len = schema.fieldsLength(); i < len; i += 1|0) {
         field = schema.fields(i);
-        vectors.push(vectorFromField(field));
+        _createDictionaryVectors(field, dictionaries);
+        vectors.push(vectorFromField(field, dictionaries));
+    }
+
+    for (i = 0; i < footer.dictionariesLength(); i += 1|0) {
+        block = footer.dictionaries(i);
+        dictionaryBatchBlocks.push({
+            offset: block.offset().low,
+            metaDataLength: block.metaDataLength(),
+            bodyLength: block.bodyLength().low,
+        })
     }
 
     for (i = 0; i < footer.recordBatchesLength(); i += 1|0) {
-        recordBatchBlock = footer.recordBatches(i);
+        block = footer.recordBatches(i);
         recordBatchBlocks.push({
-            offset: recordBatchBlock.offset(),
-            metaDataLength: recordBatchBlock.metaDataLength(),
-            bodyLength: recordBatchBlock.bodyLength(),
+            offset: block.offset().low,
+            metaDataLength: block.metaDataLength(),
+            bodyLength: block.bodyLength().low,
         })
     }
 
-    loadBuffersIntoVectors(recordBatchBlocks, bb, vectors);
-    var rtrn : any = {};
-    for (var i : any = 0; i < vectors.length; i += 1|0) {
-      rtrn[vectors[i].name] = vectors[i]
+    var dictionaryBatches = dictionaryBatchBlocks.map(function (block) {
+        bb.setPosition(block.offset);
+        // TODO: Make sure this is a dictionary batch
+        return _loadBatch(bb);
+    });
+
+    var recordBatches = recordBatchBlocks.map(function (block) {
+        bb.setPosition(block.offset);
+        // TODO: Make sure this is a record batch
+        return _loadBatch(bb);
+    });
+
+    // load dictionary vectors
+    for (i = 0; i < dictionaryBatches.length; i += 1|0) {
+        batch = dictionaryBatches[i];
+        loadVectors(bb, [dictionaries[batch.id]], batch);
     }
-    return rtrn;
-}
 
-export function loadSchema(buf) {
-    var footer = _loadFooter(new flatbuffers.ByteBuffer(buf));
-    var schema = footer.schema();
-
-    return parseSchema(schema);
+    return new ArrowReader(bb, parseSchema(schema), vectors, recordBatches, dictionaries);
 }
 
 function _loadFooter(bb) {
@@ -81,7 +219,7 @@ function _loadFooter(bb) {
 
     var footerLengthOffset: number = fileLength - MAGIC.length - 4;
     bb.setPosition(footerLengthOffset);
-    var footerLength: number = Int64FromByteBuffer(bb, footerLengthOffset)
+    var footerLength: number = Int32FromByteBuffer(bb, footerLengthOffset)
 
     if (footerLength <= 0 || footerLength + MAGIC.length*2 + 4 > fileLength)  {
       console.log("Invalid footer length: " + footerLength)
@@ -89,19 +227,166 @@ function _loadFooter(bb) {
 
     var footerOffset: number = footerLengthOffset - footerLength;
     bb.setPosition(footerOffset);
-    var footer = arrow.flatbuf.Footer.getRootAsFooter(bb);
+    var footer = Footer.getRootAsFooter(bb);
 
     return footer;
 }
 
-function Int64FromByteBuffer(bb, offset) {
+function _loadSchema(bb) {
+    var message =_loadMessage(bb);
+    if (message.headerType() != MessageHeader.Schema) {
+        console.error("Expected header type " + MessageHeader.Schema + " but got " + message.headerType());
+        return;
+    }
+    return message.header(new Schema());
+}
+
+function _loadBatch(bb) {
+    var message = _loadMessage(bb);
+    if (message == null) {
+        return;
+    } else if (message.headerType() == MessageHeader.RecordBatch) {
+        var batch = { header: message.header(new RecordBatch()), length: message.bodyLength().low }
+        return _loadRecordBatch(bb, batch);
+    } else if (message.headerType() == MessageHeader.DictionaryBatch) {
+        var batch = { header: message.header(new DictionaryBatch()), length: message.bodyLength().low }
+        return _loadDictionaryBatch(bb, batch);
+    } else {
+        console.error("Expected header type " + MessageHeader.RecordBatch + " or " + MessageHeader.DictionaryBatch +
+            " but got " + message.headerType());
+        return;
+    }
+}
+
+function _loadRecordBatch(bb, batch) {
+    var data = batch.header;
+    var i, nodes_ = [], nodesLength = data.nodesLength();
+    var buffer, buffers_ = [], buffersLength = data.buffersLength();
+
+    for (i = 0; i < nodesLength; i += 1) {
+        nodes_.push(data.nodes(i));
+    }
+    for (i = 0; i < buffersLength; i += 1) {
+        buffer = data.buffers(i);
+        buffers_.push({ offset: bb.position() + buffer.offset().low, length: buffer.length().low });
+    }
+    // position the buffer after the body to read the next message
+    bb.setPosition(bb.position() + batch.length);
+
+    return { nodes: nodes_, buffers: buffers_, length: data.length().low, type: MessageHeader.RecordBatch };
+}
+
+function _loadDictionaryBatch(bb, batch) {
+    var id_ = batch.header.id().toFloat64().toString(), data = batch.header.data();
+    var i, nodes_ = [], nodesLength = data.nodesLength();
+    var buffer, buffers_ = [], buffersLength = data.buffersLength();
+
+    for (i = 0; i < nodesLength; i += 1) {
+        nodes_.push(data.nodes(i));
+    }
+    for (i = 0; i < buffersLength; i += 1) {
+        buffer = data.buffers(i);
+        buffers_.push({ offset: bb.position() + buffer.offset().low, length: buffer.length().low });
+    }
+    // position the buffer after the body to read the next message
+    bb.setPosition(bb.position() + batch.length);
+
+    return { id: id_, nodes: nodes_, buffers: buffers_, length: data.length().low, type: MessageHeader.DictionaryBatch };
+}
+
+function _loadMessage(bb) {
+    var messageLength: number = Int32FromByteBuffer(bb, bb.position());
+    if (messageLength == 0) {
+      return;
+    }
+    bb.setPosition(bb.position() + 4);
+    var message = Message.getRootAsMessage(bb);
+    // position the buffer at the end of the message so it's ready to read further
+    bb.setPosition(bb.position() + messageLength);
+
+    return message;
+}
+
+function _createDictionaryVectors(field, dictionaries) {
+    var encoding = field.dictionary();
+    if (encoding != null) {
+        var id = encoding.id().toFloat64().toString();
+        if (dictionaries[id] == null) {
+            // create a field for the dictionary
+            var dictionaryField = _createDictionaryField(id, field);
+            dictionaries[id] = vectorFromField(dictionaryField, null);
+        }
+    }
+
+    // recursively examine child fields
+    for (var i = 0, len = field.childrenLength(); i < len; i += 1|0) {
+        _createDictionaryVectors(field.children(i), dictionaries);
+    }
+}
+
+function _createDictionaryField(id, field) {
+    var builder = new flatbuffers.Builder();
+    var nameOffset = builder.createString("dict-" + id);
+
+    var typeType = field.typeType();
+    var typeOffset;
+    if (typeType === Type.Int) {
+        var type = field.type(new org.apache.arrow.flatbuf.Int());
+        org.apache.arrow.flatbuf.Int.startInt(builder);
+        org.apache.arrow.flatbuf.Int.addBitWidth(builder, type.bitWidth());
+        org.apache.arrow.flatbuf.Int.addIsSigned(builder, type.isSigned());
+        typeOffset = org.apache.arrow.flatbuf.Int.endInt(builder);
+    } else if (typeType === Type.FloatingPoint) {
+        var type = field.type(new org.apache.arrow.flatbuf.FloatingPoint());
+        org.apache.arrow.flatbuf.FloatingPoint.startFloatingPoint(builder);
+        org.apache.arrow.flatbuf.FloatingPoint.addPrecision(builder, type.precision());
+        typeOffset = org.apache.arrow.flatbuf.FloatingPoint.endFloatingPoint(builder);
+    } else if (typeType === Type.Utf8) {
+        org.apache.arrow.flatbuf.Utf8.startUtf8(builder);
+        typeOffset = org.apache.arrow.flatbuf.Utf8.endUtf8(builder);
+    } else if (typeType === Type.Date) {
+        var type = field.type(new org.apache.arrow.flatbuf.Date());
+        org.apache.arrow.flatbuf.Date.startDate(builder);
+        org.apache.arrow.flatbuf.Date.addUnit(builder, type.unit());
+        typeOffset = org.apache.arrow.flatbuf.Date.endDate(builder);
+    } else {
+        throw "Unimplemented dictionary type " + typeType;
+    }
+    if (field.childrenLength() > 0) {
+      throw "Dictionary encoded fields can't have children"
+    }
+    var childrenOffset = org.apache.arrow.flatbuf.Field.createChildrenVector(builder, []);
+
+    var layout, layoutOffsets = [];
+    for (var i = 0, len = field.layoutLength(); i < len; i += 1|0) {
+        layout = field.layout(i);
+        org.apache.arrow.flatbuf.VectorLayout.startVectorLayout(builder);
+        org.apache.arrow.flatbuf.VectorLayout.addBitWidth(builder, layout.bitWidth());
+        org.apache.arrow.flatbuf.VectorLayout.addType(builder, layout.type());
+        layoutOffsets.push(org.apache.arrow.flatbuf.VectorLayout.endVectorLayout(builder));
+    }
+    var layoutOffset = org.apache.arrow.flatbuf.Field.createLayoutVector(builder, layoutOffsets);
+
+    org.apache.arrow.flatbuf.Field.startField(builder);
+    org.apache.arrow.flatbuf.Field.addName(builder, nameOffset);
+    org.apache.arrow.flatbuf.Field.addNullable(builder, field.nullable());
+    org.apache.arrow.flatbuf.Field.addTypeType(builder, typeType);
+    org.apache.arrow.flatbuf.Field.addType(builder, typeOffset);
+    org.apache.arrow.flatbuf.Field.addChildren(builder, childrenOffset);
+    org.apache.arrow.flatbuf.Field.addLayout(builder, layoutOffset);
+    var offset = org.apache.arrow.flatbuf.Field.endField(builder);
+    builder.finish(offset);
+
+    return org.apache.arrow.flatbuf.Field.getRootAsField(builder.bb);
+}
+
+function Int32FromByteBuffer(bb, offset) {
     return ((bb.bytes_[offset + 3] & 255) << 24) |
            ((bb.bytes_[offset + 2] & 255) << 16) |
            ((bb.bytes_[offset + 1] & 255) << 8) |
            ((bb.bytes_[offset] & 255));
 }
 
-
 var MAGIC_STR = "ARROW1";
 var MAGIC = new Uint8Array(MAGIC_STR.length);
 for (var i = 0; i < MAGIC_STR.length; i += 1|0) {
@@ -118,27 +403,28 @@ function _checkMagic(buf, index) {
 }
 
 var TYPEMAP = {}
-TYPEMAP[arrow.flatbuf.Type.NONE]          = "NONE";
-TYPEMAP[arrow.flatbuf.Type.Null]          = "Null";
-TYPEMAP[arrow.flatbuf.Type.Int]           = "Int";
-TYPEMAP[arrow.flatbuf.Type.FloatingPoint] = "FloatingPoint";
-TYPEMAP[arrow.flatbuf.Type.Binary]        = "Binary";
-TYPEMAP[arrow.flatbuf.Type.Utf8]          = "Utf8";
-TYPEMAP[arrow.flatbuf.Type.Bool]          = "Bool";
-TYPEMAP[arrow.flatbuf.Type.Decimal]       = "Decimal";
-TYPEMAP[arrow.flatbuf.Type.Date]          = "Date";
-TYPEMAP[arrow.flatbuf.Type.Time]          = "Time";
-TYPEMAP[arrow.flatbuf.Type.Timestamp]     = "Timestamp";
-TYPEMAP[arrow.flatbuf.Type.Interval]      = "Interval";
-TYPEMAP[arrow.flatbuf.Type.List]          = "List";
-TYPEMAP[arrow.flatbuf.Type.Struct_]       = "Struct";
-TYPEMAP[arrow.flatbuf.Type.Union]         = "Union";
+TYPEMAP[Type.NONE]          = "NONE";
+TYPEMAP[Type.Null]          = "Null";
+TYPEMAP[Type.Int]           = "Int";
+TYPEMAP[Type.FloatingPoint] = "FloatingPoint";
+TYPEMAP[Type.Binary]        = "Binary";
+TYPEMAP[Type.Utf8]          = "Utf8";
+TYPEMAP[Type.Bool]          = "Bool";
+TYPEMAP[Type.Decimal]       = "Decimal";
+TYPEMAP[Type.Date]          = "Date";
+TYPEMAP[Type.Time]          = "Time";
+TYPEMAP[Type.Timestamp]     = "Timestamp";
+TYPEMAP[Type.Interval]      = "Interval";
+TYPEMAP[Type.List]          = "List";
+TYPEMAP[Type.FixedSizeList] = "FixedSizeList";
+TYPEMAP[Type.Struct_]       = "Struct";
+TYPEMAP[Type.Union]         = "Union";
 
 var VECTORTYPEMAP = {};
-VECTORTYPEMAP[arrow.flatbuf.VectorType.OFFSET]   = 'OFFSET';
-VECTORTYPEMAP[arrow.flatbuf.VectorType.DATA]     = 'DATA';
-VECTORTYPEMAP[arrow.flatbuf.VectorType.VALIDITY] = 'VALIDITY';
-VECTORTYPEMAP[arrow.flatbuf.VectorType.TYPE]     = 'TYPE';
+VECTORTYPEMAP[VectorType.OFFSET]   = 'OFFSET';
+VECTORTYPEMAP[VectorType.DATA]     = 'DATA';
+VECTORTYPEMAP[VectorType.VALIDITY] = 'VALIDITY';
+VECTORTYPEMAP[VectorType.TYPE]     = 'TYPE';
 
 function parseField(field) {
     var children = [];
@@ -149,7 +435,6 @@ function parseField(field) {
     var layouts = [];
     for (var i = 0; i < field.layoutLength(); i += 1|0) {
         layouts.push(VECTORTYPEMAP[field.layout(i).type()]);
-
     }
 
     return {
@@ -170,32 +455,39 @@ function parseSchema(schema) {
     return result;
 }
 
-function parseBuffer(buffer) {
-    return {
-        offset: buffer.offset(),
-        length: buffer.length()
-    };
+function loadVectors(bb, vectors: Vector[], recordBatch) {
+    var indices = { bufferIndex: 0, nodeIndex: 0 }, i;
+    for (i = 0; i < vectors.length; i += 1) {
+        loadVector(bb, vectors[i], recordBatch, indices);
+    }
 }
 
-function loadBuffersIntoVectors(recordBatchBlocks, bb, vectors : Vector[]) {
-    var fieldNode, recordBatchBlock, recordBatch, numBuffers, bufReader = {index: 0, node_index: 1}, field_ctr = 0;
-    var buffer = bb.bytes_.buffer;
-    var baseOffset = bb.bytes_.byteOffset;
-    for (var i = recordBatchBlocks.length - 1; i >= 0; i -= 1|0) {
-        recordBatchBlock = recordBatchBlocks[i];
-        bb.setPosition(recordBatchBlock.offset.low);
-        recordBatch = arrow.flatbuf.RecordBatch.getRootAsRecordBatch(bb);
-        bufReader.index = 0;
-        bufReader.node_index = 0;
-        numBuffers = recordBatch.buffersLength();
-
-        //console.log('num buffers: ' + recordBatch.buffersLength());
-        //console.log('num nodes: ' + recordBatch.nodesLength());
-
-        while (bufReader.index < numBuffers) {
-            //console.log('Allocating buffers starting at ' + bufReader.index + '/' + numBuffers + ' to field ' + field_ctr);
-            vectors[field_ctr].loadData(recordBatch, buffer, bufReader, baseOffset + recordBatchBlock.offset.low + recordBatchBlock.metaDataLength)
-            field_ctr += 1;
-        }
+/**
+ * Loads a vector with data from a batch
+ *   recordBatch: { nodes: org.apache.arrow.flatbuf.FieldNode[], buffers: { offset: number, length: number }[] }
+ */
+function loadVector(bb, vector: Vector, recordBatch, indices) {
+    var node = recordBatch.nodes[indices.nodeIndex], ownBuffersLength, ownBuffers = [], i;
+    indices.nodeIndex += 1;
+
+    // dictionary vectors are always ints, so will have a data vector plus optional null vector
+    if (vector.field.dictionary() == null) {
+        ownBuffersLength = vector.field.layoutLength();
+    } else if (vector.field.nullable()) {
+        ownBuffersLength = 2;
+    } else {
+        ownBuffersLength = 1;
+    }
+
+    for (i = 0; i < ownBuffersLength; i += 1) {
+        ownBuffers.push(recordBatch.buffers[indices.bufferIndex + i]);
+    }
+    indices.bufferIndex += ownBuffersLength;
+
+    vector.loadData(bb, node, ownBuffers);
+
+    var children = vector.getChildVectors();
+    for (i = 0; i < children.length; i++) {
+        loadVector(bb, children[i], recordBatch, indices);
     }
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/lib/bitarray.ts
----------------------------------------------------------------------
diff --git a/js/lib/bitarray.ts b/js/lib/bitarray.ts
index 82fff32..fc3c091 100644
--- a/js/lib/bitarray.ts
+++ b/js/lib/bitarray.ts
@@ -17,22 +17,9 @@
 
 export class BitArray {
     private view: Uint8Array;
-    constructor(buffer: ArrayBuffer, offset: number, length: number) {
-        //if (ArrayBuffer.isView(buffer)) {
-        //    var og_view = buffer;
-        //    buffer = buffer.buffer;
-        //    offset = og_view.offset;
-        //    length = og_view.length/og_view.BYTES_PER_ELEMENT*8;
-        //} else if (buffer instanceof ArrayBuffer) {
-        var offset = offset || 0;
-        var length = length;// || buffer.length*8;
-        //} else if (buffer instanceof Number) {
-        //    length = buffer;
-        //    buffer = new ArrayBuffer(Math.ceil(length/8));
-        //    offset = 0;
-        //}
 
-        this.view = new Uint8Array(buffer, offset, Math.ceil(length/8));
+    constructor(buffer: ArrayBuffer, offset: number, length: number) {
+        this.view = new Uint8Array(buffer, offset || 0, Math.ceil(length / 8));
     }
 
     get(i) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/lib/types.ts
----------------------------------------------------------------------
diff --git a/js/lib/types.ts b/js/lib/types.ts
index bbc7558..d656c6a 100644
--- a/js/lib/types.ts
+++ b/js/lib/types.ts
@@ -18,7 +18,8 @@
 import { BitArray } from './bitarray';
 import { TextDecoder } from 'text-encoding';
 import { org } from './Arrow_generated';
-var arrow = org.apache.arrow;
+
+var Type = org.apache.arrow.flatbuf.Type;
 
 interface ArrayView {
     slice(start: number, end: number) : ArrayView
@@ -26,72 +27,90 @@ interface ArrayView {
 }
 
 export abstract class Vector {
+    field: any;
     name: string;
     length: number;
     null_count: number;
-    constructor(name: string) {
-        this.name = name;
+
+    constructor(field) {
+        this.field = field;
+        this.name = field.name();
     }
+
     /* Access datum at index i */
     abstract get(i);
     /* Return array representing data in the range [start, end) */
     abstract slice(start: number, end: number);
-
-    /* Use recordBatch fieldNodes and Buffers to construct this Vector */
-    public loadData(recordBatch: any, buffer: any, bufReader: any, baseOffset: any) {
-        var fieldNode = recordBatch.nodes(bufReader.node_index);
-        this.length = fieldNode.length();
-        this.null_count = fieldNode.length();
-        bufReader.node_index += 1|0;
-
-        this.loadBuffers(recordBatch, buffer, bufReader, baseOffset);
-    }
-
-    protected abstract loadBuffers(recordBatch: any, buffer: any, bufReader: any, baseOffset: any);
-
-    /* Helper function for loading a VALIDITY buffer (for Nullable types) */
-    static loadValidityBuffer(recordBatch, buffer, bufReader, baseOffset) : BitArray {
-        var buf_meta = recordBatch.buffers(bufReader.index);
-        var offset = baseOffset + buf_meta.offset().low;
-        var length = buf_meta.length().low;
-        bufReader.index += 1|0;
-        return new BitArray(buffer, offset, length*8);
-    }
-
-    /* Helper function for loading an OFFSET buffer */
-    static loadOffsetBuffer(recordBatch, buffer, bufReader, baseOffset) : Int32Array {
-        var buf_meta = recordBatch.buffers(bufReader.index);
-        var offset = baseOffset + buf_meta.offset().low;
-        var length = buf_meta.length().low/Int32Array.BYTES_PER_ELEMENT;
-        bufReader.index += 1|0;
-        return new Int32Array(buffer, offset, length);
+    /* Return array of child vectors, for container types */
+    abstract getChildVectors();
+
+    /**
+     * Use recordBatch fieldNodes and Buffers to construct this Vector
+     *   bb: flatbuffers.ByteBuffer
+     *   node: org.apache.arrow.flatbuf.FieldNode
+     *   buffers: { offset: number, length: number }[]
+     */
+    public loadData(bb, node, buffers) {
+        this.length = node.length().low;
+        this.null_count = node.nullCount().low;
+        this.loadBuffers(bb, node, buffers);
+    }
+
+    protected abstract loadBuffers(bb, node, buffers);
+
+    /**
+     * Helper function for loading a VALIDITY buffer (for Nullable types)
+     *   bb: flatbuffers.ByteBuffer
+     *   buffer: org.apache.arrow.flatbuf.Buffer
+     */
+    static loadValidityBuffer(bb, buffer) : BitArray {
+        var arrayBuffer = bb.bytes_.buffer;
+        var offset = bb.bytes_.byteOffset + buffer.offset;
+        return new BitArray(arrayBuffer, offset, buffer.length * 8);
+    }
+
+    /**
+     * Helper function for loading an OFFSET buffer
+     *   buffer: org.apache.arrow.flatbuf.Buffer
+     */
+    static loadOffsetBuffer(bb, buffer) : Int32Array {
+        var arrayBuffer = bb.bytes_.buffer;
+        var offset  = bb.bytes_.byteOffset + buffer.offset;
+        var length = buffer.length / Int32Array.BYTES_PER_ELEMENT;
+        return new Int32Array(arrayBuffer, offset, length);
     }
 
 }
 
 class SimpleVector<T extends ArrayView> extends Vector {
     protected dataView: T;
-    private TypedArray: {new(buffer: any, offset: number, length: number) : T, BYTES_PER_ELEMENT: number};
+    private TypedArray: { new(buffer: any, offset: number, length: number): T, BYTES_PER_ELEMENT: number };
 
-    constructor (TypedArray: {new(buffer: any, offset: number, length: number): T, BYTES_PER_ELEMENT: number}, name: string) {
-        super(name);
+    constructor (field, TypedArray: { new(buffer: any, offset: number, length: number): T, BYTES_PER_ELEMENT: number }) {
+        super(field);
         this.TypedArray = TypedArray;
     }
 
+    getChildVectors() {
+        return [];
+    }
+
     get(i) {
         return this.dataView[i];
     }
 
-    loadBuffers(recordBatch, buffer, bufReader, baseOffset) {
-        this.dataView = this.loadDataBuffer(recordBatch, buffer, bufReader, baseOffset);
+    loadBuffers(bb, node, buffers) {
+        this.loadDataBuffer(bb, buffers[0]);
     }
 
-    loadDataBuffer(recordBatch, buffer, bufReader, baseOffset) : T {
-        var buf_meta = recordBatch.buffers(bufReader.index);
-        var offset = baseOffset + buf_meta.offset().low;
-        var length = buf_meta.length().low/this.TypedArray.BYTES_PER_ELEMENT;
-        bufReader.index += 1|0;
-        return new this.TypedArray(buffer, offset, length);
+    /**
+      * buffer: org.apache.arrow.flatbuf.Buffer
+      */
+    protected loadDataBuffer(bb, buffer) {
+        var arrayBuffer = bb.bytes_.buffer;
+        var offset  = bb.bytes_.byteOffset + buffer.offset;
+        var length = buffer.length / this.TypedArray.BYTES_PER_ELEMENT;
+        this.dataView = new this.TypedArray(arrayBuffer, offset, length);
     }
 
     getDataView() {
@@ -108,77 +127,173 @@ class SimpleVector<T extends ArrayView> extends Vector {
 }
 
 class NullableSimpleVector<T extends ArrayView> extends SimpleVector<T> {
-    private validityView: BitArray;
+
+    protected validityView: BitArray;
+
+    get(i: number) {
+        if (this.validityView.get(i)) {
+            return this.dataView[i];
+        } else {
+          return null;
+        }
+    }
+
+    loadBuffers(bb, node, buffers) {
+        this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
+        this.loadDataBuffer(bb, buffers[1]);
+    }
+
+    getValidityVector() {
+        return this.validityView;
+    }
+}
+
+class Uint8Vector   extends SimpleVector<Uint8Array>   { constructor(field) { super(field, Uint8Array);   }; }
+class Uint16Vector  extends SimpleVector<Uint16Array>  { constructor(field) { super(field, Uint16Array);  }; }
+class Uint32Vector  extends SimpleVector<Uint32Array>  { constructor(field) { super(field, Uint32Array);  }; }
+class Int8Vector    extends SimpleVector<Uint8Array>   { constructor(field) { super(field, Uint8Array);   }; }
+class Int16Vector   extends SimpleVector<Uint16Array>  { constructor(field) { super(field, Uint16Array);  }; }
+class Int32Vector   extends SimpleVector<Uint32Array>  { constructor(field) { super(field, Uint32Array);  }; }
+class Float32Vector extends SimpleVector<Float32Array> { constructor(field) { super(field, Float32Array); }; }
+class Float64Vector extends SimpleVector<Float64Array> { constructor(field) { super(field, Float64Array); }; }
+
+class NullableUint8Vector   extends NullableSimpleVector<Uint8Array>   { constructor(field) { super(field, Uint8Array);   }; }
+class NullableUint16Vector  extends NullableSimpleVector<Uint16Array>  { constructor(field) { super(field, Uint16Array);  }; }
+class NullableUint32Vector  extends NullableSimpleVector<Uint32Array>  { constructor(field) { super(field, Uint32Array);  }; }
+class NullableInt8Vector    extends NullableSimpleVector<Uint8Array>   { constructor(field) { super(field, Uint8Array);   }; }
+class NullableInt16Vector   extends NullableSimpleVector<Uint16Array>  { constructor(field) { super(field, Uint16Array);  }; }
+class NullableInt32Vector   extends NullableSimpleVector<Uint32Array>  { constructor(field) { super(field, Uint32Array);  }; }
+class NullableFloat32Vector extends NullableSimpleVector<Float32Array> { constructor(field) { super(field, Float32Array); }; }
+class NullableFloat64Vector extends NullableSimpleVector<Float64Array> { constructor(field) { super(field, Float64Array); }; }
+
+class Uint64Vector extends SimpleVector<Uint32Array>  {
+    constructor(field) {
+        super(field, Uint32Array);
+    }
 
     get(i: number) {
-        if (this.validityView.get(i)) return this.dataView[i];
-        else                          return null
+        return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] };
     }
+}
 
-    loadBuffers(recordBatch, buffer, bufReader, baseOffset) {
-        this.validityView = Vector.loadValidityBuffer(recordBatch, buffer, bufReader, baseOffset);
-        this.dataView = this.loadDataBuffer(recordBatch, buffer, bufReader, baseOffset);
+class NullableUint64Vector extends NullableSimpleVector<Uint32Array>  {
+    constructor(field) {
+        super(field, Uint32Array);
     }
 
+    get(i: number) {
+        if (this.validityView.get(i)) {
+            return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] };
+        } else {
+          return null;
+        }
+    }
 }
 
-class Uint8Vector   extends SimpleVector<Uint8Array>   { constructor(name: string) { super(Uint8Array,  name);  }; }
-class Uint16Vector  extends SimpleVector<Uint16Array>  { constructor(name: string) { super(Uint16Array, name);  }; }
-class Uint32Vector  extends SimpleVector<Uint32Array>  { constructor(name: string) { super(Uint32Array, name);  }; }
-class Int8Vector    extends SimpleVector<Uint8Array>   { constructor(name: string) { super(Uint8Array,  name);  }; }
-class Int16Vector   extends SimpleVector<Uint16Array>  { constructor(name: string) { super(Uint16Array, name);  }; }
-class Int32Vector   extends SimpleVector<Uint32Array>  { constructor(name: string) { super(Uint32Array, name);  }; }
-class Float32Vector extends SimpleVector<Float32Array> { constructor(name: string) { super(Float32Array, name); }; }
-class Float64Vector extends SimpleVector<Float64Array> { constructor(name: string) { super(Float64Array, name); }; }
-
-class NullableUint8Vector   extends NullableSimpleVector<Uint8Array>   { constructor(name: string) { super(Uint8Array,  name);  }; }
-class NullableUint16Vector  extends NullableSimpleVector<Uint16Array>  { constructor(name: string) { super(Uint16Array, name);  }; }
-class NullableUint32Vector  extends NullableSimpleVector<Uint32Array>  { constructor(name: string) { super(Uint32Array, name);  }; }
-class NullableInt8Vector    extends NullableSimpleVector<Uint8Array>   { constructor(name: string) { super(Uint8Array,  name);  }; }
-class NullableInt16Vector   extends NullableSimpleVector<Uint16Array>  { constructor(name: string) { super(Uint16Array, name);  }; }
-class NullableInt32Vector   extends NullableSimpleVector<Uint32Array>  { constructor(name: string) { super(Uint32Array, name);  }; }
-class NullableFloat32Vector extends NullableSimpleVector<Float32Array> { constructor(name: string) { super(Float32Array, name); }; }
-class NullableFloat64Vector extends NullableSimpleVector<Float64Array> { constructor(name: string) { super(Float64Array, name); }; }
+class Int64Vector extends NullableSimpleVector<Uint32Array>  {
+    constructor(field) {
+        super(field, Uint32Array);
+    }
+
+    get(i: number) {
+        return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] };
+    }
+}
+
+class NullableInt64Vector extends NullableSimpleVector<Uint32Array>  {
+    constructor(field) {
+        super(field, Uint32Array);
+    }
+
+    get(i: number) {
+        if (this.validityView.get(i)) {
+            return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] };
+        } else {
+          return null;
+        }
+    }
+}
+
+class DateVector extends SimpleVector<Uint32Array> {
+    constructor(field) {
+        super(field, Uint32Array);
+    }
+
+    get (i) {
+        return new Date(super.get(2*i+1)*Math.pow(2,32) + super.get(2*i));
+    }
+}
+
+class NullableDateVector extends DateVector {
+    private validityView: BitArray;
+
+    loadBuffers(bb, node, buffers) {
+        this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
+        this.loadDataBuffer(bb, buffers[1]);
+    }
+
+    get (i) {
+        if (this.validityView.get(i)) {
+            return super.get(i);
+        } else {
+            return null;
+        }
+    }
+
+    getValidityVector() {
+        return this.validityView;
+    }
+}
 
 class Utf8Vector extends SimpleVector<Uint8Array> {
     protected offsetView: Int32Array;
     static decoder: TextDecoder = new TextDecoder('utf8');
 
-    constructor(name: string) {
-        super(Uint8Array, name);
+    constructor(field) {
+        super(field, Uint8Array);
     }
 
-    loadBuffers(recordBatch, buffer, bufReader, baseOffset) {
-        this.offsetView = Vector.loadOffsetBuffer(recordBatch, buffer, bufReader, baseOffset);
-        this.dataView = this.loadDataBuffer(recordBatch, buffer, bufReader, baseOffset);
+    loadBuffers(bb, node, buffers) {
+        this.offsetView = Vector.loadOffsetBuffer(bb, buffers[0]);
+        this.loadDataBuffer(bb, buffers[1]);
     }
 
     get(i) {
-        return Utf8Vector.decoder.decode
-            (this.dataView.slice(this.offsetView[i], this.offsetView[i + 1]));
+        return Utf8Vector.decoder.decode(this.dataView.slice(this.offsetView[i], this.offsetView[i + 1]));
     }
 
     slice(start: number, end: number) {
-        var rtrn: string[] = [];
+        var result: string[] = [];
         for (var i: number = start; i < end; i += 1|0) {
-            rtrn.push(this.get(i));
+            result.push(this.get(i));
         }
-        return rtrn;
+        return result;
+    }
+
+    getOffsetView() {
+        return this.offsetView;
     }
 }
 
 class NullableUtf8Vector extends Utf8Vector {
     private validityView: BitArray;
 
-    loadBuffers(recordBatch, buffer, bufReader, baseOffset) {
-        this.validityView = Vector.loadValidityBuffer(recordBatch, buffer, bufReader, baseOffset);
-        this.offsetView = Vector.loadOffsetBuffer(recordBatch, buffer, bufReader, baseOffset);
-        this.dataView = this.loadDataBuffer(recordBatch, buffer, bufReader, baseOffset);
+    loadBuffers(bb, node, buffers) {
+        this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
+        this.offsetView = Vector.loadOffsetBuffer(bb, buffers[1]);
+        this.loadDataBuffer(bb, buffers[2]);
     }
 
     get(i) {
-        if (!this.validityView.get(i)) return null;
-        return super.get(i);
+        if (this.validityView.get(i)) {
+            return super.get(i);
+        } else {
+            return null;
+        }
+    }
+
+    getValidityVector() {
+        return this.validityView;
     }
 }
 
@@ -186,14 +301,17 @@ class NullableUtf8Vector extends Utf8Vector {
 class ListVector extends Uint32Vector {
     private dataVector: Vector;
 
-    constructor(name, dataVector : Vector) {
-        super(name);
+    constructor(field, dataVector: Vector) {
+        super(field);
         this.dataVector = dataVector;
     }
 
-    loadBuffers(recordBatch, buffer, bufReader, baseOffset) {
-        super.loadBuffers(recordBatch, buffer, bufReader, baseOffset);
-        this.dataVector.loadData(recordBatch, buffer, bufReader, baseOffset);
+    getChildVectors() {
+        return [this.dataVector];
+    }
+
+    loadBuffers(bb, node, buffers) {
+        super.loadBuffers(bb, node, buffers);
         this.length -= 1;
     }
 
@@ -210,119 +328,262 @@ class ListVector extends Uint32Vector {
         return "length: " + (this.length);
     }
 
-    slice(start : number, end : number) { return []; };
+    slice(start: number, end: number) {
+        var result = [];
+        for (var i = start; i < end; i += 1|0) {
+            result.push(this.get(i));
+        }
+        return result;
+    }
 }
 
 class NullableListVector extends ListVector {
     private validityView: BitArray;
 
-    loadBuffers(recordBatch, buffer, bufReader, baseOffset) {
-        this.validityView = Vector.loadValidityBuffer(recordBatch, buffer, bufReader, baseOffset);
-        super.loadBuffers(recordBatch, buffer, bufReader, baseOffset);
+    loadBuffers(bb, node, buffers) {
+        this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
+        this.loadDataBuffer(bb, buffers[1]);
+        this.length -= 1;
     }
 
     get(i) {
-        if (!this.validityView.get(i)) return null;
-        return super.get(i);
+        if (this.validityView.get(i)) {
+            return super.get(i);
+        } else {
+            return null;
+        }
+    }
+
+    getValidityVector() {
+        return this.validityView;
+    }
+}
+
+class FixedSizeListVector extends Vector {
+    private size: number
+    private dataVector: Vector;
+
+    constructor(field, size: number, dataVector: Vector) {
+        super(field);
+        this.size = size;
+        this.dataVector = dataVector;
+    }
+
+    getChildVectors() {
+        return [this.dataVector];
+    }
+
+    loadBuffers(bb, node, buffers) {
+        // no buffers to load
+    }
+
+    get(i: number) {
+        return this.dataVector.slice(i * this.size, (i + 1) * this.size);
+    }
+
+    slice(start : number, end : number) {
+        var result = [];
+        for (var i = start; i < end; i += 1|0) {
+            result.push(this.get(i));
+        }
+        return result;
+    }
+
+    getListSize() {
+        return this.size;
+    }
+}
+
+class NullableFixedSizeListVector extends FixedSizeListVector {
+    private validityView: BitArray;
+
+    loadBuffers(bb, node, buffers) {
+        this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
+    }
+
+    get(i: number) {
+        if (this.validityView.get(i)) {
+            return super.get(i);
+        } else {
+            return null;
+        }
+    }
+
+    getValidityVector() {
+        return this.validityView;
     }
 }
 
 class StructVector extends Vector {
     private validityView: BitArray;
-    private vectors : Vector[];
-    constructor(name: string, vectors: Vector[]) {
-        super(name);
+    private vectors: Vector[];
+
+    constructor(field, vectors: Vector[]) {
+        super(field);
         this.vectors = vectors;
     }
 
-    loadBuffers(recordBatch, buffer, bufReader, baseOffset) {
-        this.validityView = Vector.loadValidityBuffer(recordBatch, buffer, bufReader, baseOffset);
-        this.vectors.forEach((v: Vector) => v.loadData(recordBatch, buffer, bufReader, baseOffset));
+    getChildVectors() {
+        return this.vectors;
+    }
+
+    loadBuffers(bb, node, buffers) {
+        this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
     }
 
     get(i : number) {
-        if (!this.validityView.get(i)) return null;
-        return this.vectors.map((v: Vector) => v.get(i));
+        if (this.validityView.get(i)) {
+          return this.vectors.map((v: Vector) => v.get(i));
+        } else {
+            return null;
+        }
     }
 
     slice(start : number, end : number) {
-        var rtrn = [];
-        for (var i: number = start; i < end; i += 1|0) {
-            rtrn.push(this.get(i));
+        var result = [];
+        for (var i = start; i < end; i += 1|0) {
+            result.push(this.get(i));
         }
-        return rtrn;
+        return result;
+    }
+
+    getValidityVector() {
+        return this.validityView;
     }
 }
 
-class DateVector extends SimpleVector<Uint32Array> {
-    constructor (name: string) {
-        super(Uint32Array, name);
+class DictionaryVector extends Vector {
+
+    private indices: Vector;
+    private dictionary: Vector;
+
+    constructor (field, indices: Vector, dictionary: Vector) {
+        super(field);
+        this.indices = indices;
+        this.dictionary = dictionary;
     }
 
-    get (i) {
-        return new Date(super.get(2*i+1)*Math.pow(2,32) + super.get(2*i));
+    get(i) {
+        var encoded = this.indices.get(i);
+        if (encoded == null) {
+            return null;
+        } else {
+            return this.dictionary.get(encoded);
+        }
     }
-}
 
-class NullableDateVector extends DateVector {
-    private validityView: BitArray;
+    /** Get the dictionary encoded value */
+    public getEncoded(i) {
+        return this.indices.get(i);
+    }
 
-    loadBuffers(recordBatch, buffer, bufReader, baseOffset) {
-        this.validityView = Vector.loadValidityBuffer(recordBatch, buffer, bufReader, baseOffset);
-        super.loadBuffers(recordBatch, buffer, bufReader, baseOffset);
+    slice(start, end) {
+        return this.indices.slice(start, end); // TODO decode
     }
 
-    get (i) {
-        if (!this.validityView.get(i)) return null;
-        return super.get(i);
+    getChildVectors() {
+        return this.indices.getChildVectors();
+    }
+
+    loadBuffers(bb, node, buffers) {
+        this.indices.loadData(bb, node, buffers);
+    }
+
+    /** Get the index (encoded) vector */
+    public getIndexVector() {
+        return this.indices;
+    }
+
+    /** Get the dictionary vector */
+    public getDictionaryVector() {
+        return this.dictionary;
+    }
+
+    toString() {
+        return this.indices.toString();
     }
 }
 
-var BASIC_TYPES = [arrow.flatbuf.Type.Int, arrow.flatbuf.Type.FloatingPoint, arrow.flatbuf.Type.Utf8, arrow.flatbuf.Type.Date];
-
-export function vectorFromField(field) : Vector {
-    var typeType = field.typeType();
-    if (BASIC_TYPES.indexOf(typeType) >= 0) {
-        var type = field.typeType();
-        if (type === arrow.flatbuf.Type.Int) {
-            type = field.type(new arrow.flatbuf.Int());
-            var VectorConstructor : {new(string) : Vector};
-            if (type.isSigned()) {
-                if (type.bitWidth() == 32)
-                    VectorConstructor = field.nullable() ? NullableInt32Vector : Int32Vector;
-                else if (type.bitWidth() == 16)
-                    VectorConstructor = field.nullable() ? NullableInt16Vector : Int16Vector;
-                else if (type.bitWidth() == 8)
-                    VectorConstructor = field.nullable() ? NullableInt8Vector : Int8Vector;
+export function vectorFromField(field, dictionaries) : Vector {
+    var dictionary = field.dictionary(), nullable = field.nullable();
+    if (dictionary == null) {
+        var typeType = field.typeType();
+        if (typeType === Type.List) {
+            var dataVector = vectorFromField(field.children(0), dictionaries);
+            return nullable ? new NullableListVector(field, dataVector) : new ListVector(field, dataVector);
+        } else if (typeType === Type.FixedSizeList) {
+            var dataVector = vectorFromField(field.children(0), dictionaries);
+            var size = field.type(new org.apache.arrow.flatbuf.FixedSizeList()).listSize();
+            if (nullable) {
+              return new NullableFixedSizeListVector(field, size, dataVector);
             } else {
-                if (type.bitWidth() == 32)
-                    VectorConstructor = field.nullable() ? NullableUint32Vector : Uint32Vector;
-                else if (type.bitWidth() == 16)
-                    VectorConstructor = field.nullable() ? NullableUint16Vector : Uint16Vector;
-                else if (type.bitWidth() == 8)
-                    VectorConstructor = field.nullable() ? NullableUint8Vector : Uint8Vector;
+              return new FixedSizeListVector(field, size, dataVector);
+            }
+         } else if (typeType === Type.Struct_) {
+            var vectors : Vector[] = [];
+            for (var i : number = 0; i < field.childrenLength(); i += 1|0) {
+                vectors.push(vectorFromField(field.children(i), dictionaries));
+            }
+            return new StructVector(field, vectors);
+        } else {
+            if (typeType === Type.Int) {
+                var type = field.type(new org.apache.arrow.flatbuf.Int());
+                return _createIntVector(field, type.bitWidth(), type.isSigned(), nullable)
+            } else if (typeType === Type.FloatingPoint) {
+                var precision = field.type(new org.apache.arrow.flatbuf.FloatingPoint()).precision();
+                if (precision == org.apache.arrow.flatbuf.Precision.SINGLE) {
+                    return nullable ? new NullableFloat32Vector(field) : new Float32Vector(field);
+                } else if (precision == org.apache.arrow.flatbuf.Precision.DOUBLE) {
+                    return nullable ? new NullableFloat64Vector(field) : new Float64Vector(field);
+                } else {
+                    throw "Unimplemented FloatingPoint precision " + precision;
+                }
+            } else if (typeType === Type.Utf8) {
+                return nullable ? new NullableUtf8Vector(field) : new Utf8Vector(field);
+            } else if (typeType === Type.Date) {
+                return nullable ? new NullableDateVector(field) : new DateVector(field);
+            } else {
+                throw "Unimplemented type " + typeType;
             }
-        } else if (type === arrow.flatbuf.Type.FloatingPoint) {
-            type = field.type(new arrow.flatbuf.FloatingPoint());
-            if (type.precision() == arrow.flatbuf.Precision.SINGLE)
-                VectorConstructor = field.nullable() ? NullableFloat32Vector : Float32Vector;
-            else if (type.precision() == arrow.flatbuf.Precision.DOUBLE)
-                VectorConstructor = field.nullable() ? NullableFloat64Vector : Float64Vector;
-        } else if (type === arrow.flatbuf.Type.Utf8) {
-            VectorConstructor = field.nullable() ? NullableUtf8Vector : Utf8Vector;
-        } else if (type === arrow.flatbuf.Type.Date) {
-            VectorConstructor = field.nullable() ? NullableDateVector : DateVector;
         }
+    } else {
+        // determine arrow type - default is signed 32 bit int
+        var type = dictionary.indexType(), bitWidth = 32, signed = true;
+        if (type != null) {
+            bitWidth = type.bitWidth();
+            signed = type.isSigned();
+        }
+        var indices = _createIntVector(field, bitWidth, signed, nullable);
+        return new DictionaryVector(field, indices, dictionaries[dictionary.id().toFloat64().toString()]);
+    }
+}
 
-        return new VectorConstructor(field.name());
-    } else if (typeType === arrow.flatbuf.Type.List) {
-        var dataVector = vectorFromField(field.children(0));
-        return field.nullable() ? new NullableListVector(field.name(), dataVector) : new ListVector(field.name(), dataVector);
-    } else if (typeType === arrow.flatbuf.Type.Struct_) {
-        var vectors : Vector[] = [];
-        for (var i : number = 0; i < field.childrenLength(); i += 1|0) {
-            vectors.push(vectorFromField(field.children(i)));
+function _createIntVector(field, bitWidth, signed, nullable) {
+    if (bitWidth == 64) {
+        if (signed) {
+            return nullable ? new NullableInt64Vector(field) : new Int64Vector(field);
+        } else {
+            return nullable ? new NullableUint64Vector(field) : new Uint64Vector(field);
+        }
+    } else if (bitWidth == 32) {
+        if (signed) {
+            return nullable ? new NullableInt32Vector(field) : new Int32Vector(field);
+        } else {
+            return nullable ? new NullableUint32Vector(field) : new Uint32Vector(field);
+        }
+    } else if (bitWidth == 16) {
+        if (signed) {
+            return nullable ? new NullableInt16Vector(field) : new Int16Vector(field);
+        } else {
+            return nullable ? new NullableUint16Vector(field) : new Uint16Vector(field);
+        }
+    } else if (bitWidth == 8) {
+        if (signed) {
+            return nullable ? new NullableInt8Vector(field) : new Int8Vector(field);
+        } else {
+            return nullable ? new NullableUint8Vector(field) : new Uint8Vector(field);
         }
-        return new StructVector(field.name(), vectors);
+    } else {
+         throw "Unimplemented Int bit width " + bitWidth;
     }
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/package.json
----------------------------------------------------------------------
diff --git a/js/package.json b/js/package.json
index b1e583b..8687f50 100644
--- a/js/package.json
+++ b/js/package.json
@@ -4,16 +4,20 @@
   "description": "",
   "main": "dist/arrow.js",
   "scripts": {
-    "postinstall": "./postinstall.sh",
-    "test": "echo \"Error: no test specified\" && exit 1"
+    "postinstall": "./flatbuffers.sh",
+    "build": "./flatbuffers.sh && tsc && webpack",
+    "test": "./node_modules/mocha/bin/mocha ./spec/arrow.js"
   },
   "author": "",
   "license": "Apache-2.0",
   "devDependencies": {
-    "flatbuffers": "^1.5.0",
-    "text-encoding": "^0.6.4"
+    "chai": "^3.5.0",
+    "mocha": "^3.3.0",
+    "webpack": "^2.3.3"
   },
   "dependencies": {
+    "flatbuffers": "^1.5.0",
+    "text-encoding": "^0.6.4",
     "commander": "^2.9.0"
   }
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/postinstall.sh
----------------------------------------------------------------------
diff --git a/js/postinstall.sh b/js/postinstall.sh
deleted file mode 100755
index 1e6622f..0000000
--- a/js/postinstall.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
-
-echo "Compiling flatbuffer schemas..."
-#flatc -o lib --js ../format/Message.fbs ../format/File.fbs
-flatc -o lib --js ../format/*.fbs
-cat lib/*_generated.js > lib/Arrow_generated.js

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/spec/arrow.js
----------------------------------------------------------------------
diff --git a/js/spec/arrow.js b/js/spec/arrow.js
new file mode 100644
index 0000000..61a6f81
--- /dev/null
+++ b/js/spec/arrow.js
@@ -0,0 +1,179 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+var fs = require('fs');
+var chai = require('chai');
+var assert = chai.assert;
+var path= require('path');
+var arrow = require('../dist/arrow.js');
+
+test_files = [
+  {
+    name: 'simple',
+    batches: 1,
+    fields: [
+      {
+        "name": "foo",
+        "type": "Int",
+        "data": [[1, null, 3, 4, 5]]
+      },
+      {
+        "name": "bar",
+        "type": "FloatingPoint",
+        "data": [[1.0, null, null, 4.0, 5.0]]
+      },
+      {
+        "name": "baz",
+        "type": "Utf8",
+        "data": [["aa", null, null, "bbb", "cccc"]]
+      }
+    ]
+  },
+  {
+    name: 'struct_example',
+    batches: 2,
+    fields: [
+      {
+        "name": "struct_nullable",
+        "type": "Struct",
+        "data": [
+          [
+            null,
+            [null, 'MhRNxD4'],
+            [137773603, '3F9HBxK'],
+            [410361374, 'aVd88fp'],
+            null,
+            [null, '3loZrRf'],
+            null
+          ], [
+            null,
+            [null,null],
+            [null,null],
+            null,
+            [null, '78SLiRw'],
+            null,
+            null,
+            [null, '0ilsf82'],
+            [null, 'LjS9MbU'],
+            [null, null],
+          ]
+        ]
+      }
+    ]
+  },
+  {
+    name: 'dictionary',
+    batches: 2,
+    fields: [
+      {
+        "name": "example-csv",
+        "type": "Struct",
+        "data": [
+          [
+            ["Hermione", 25, new Float32Array([-53.235599517822266, 40.231998443603516])],
+            ["Severus", 30, new Float32Array([-62.22999954223633, 3])],
+          ], [
+            ["Harry", 20, new Float32Array([23, -100.23652648925781])]
+          ]
+        ]
+      }
+    ]
+  },
+];
+
+var buf;
+
+function makeSchemaChecks(fields) {
+  describe('schema', function () {
+    var schema;
+    beforeEach(function () {
+      schema = arrow.getSchema(buf);
+    });
+
+    it('should read the number of fields', function () {
+        assert.lengthOf(schema, fields.length);
+    });
+
+    it("should understand fields", function () {
+      for (i = 0; i < fields.length; i += 1|0) {
+          assert.equal(schema[i].name, fields[i].name);
+          assert.equal(schema[i].type, fields[i].type,
+                       'bad type for field ' + schema[i].name);
+      }
+    });
+  });
+}
+
+function makeDataChecks (batches, fields) {
+  describe('data', function() {
+    var reader;
+    beforeEach(function () {
+        reader = arrow.getReader(buf)
+    });
+    it('should read the correct number of record batches', function () {
+        assert.equal(reader.getBatchCount(), batches);
+    });
+    fields.forEach(function (field, i) {
+      it('should read ' + field.type + ' vector ' + field.name, function () {
+        for (var batch_idx = 0; batch_idx < batches; batch_idx += 1|0) {
+          reader.loadNextBatch();
+          var batch = field.data[batch_idx];
+          var vector = reader.getVector(field.name)
+          assert.isDefined(vector, "vector " + field.name);
+          assert.lengthOf(vector, batch.length, "vector " + field.name)
+          for (i = 0; i < vector.length; i += 1|0) {
+            if (field.type == "Date") {
+              assert.equal(vector.get(i).getTime(), batch[i].getTime(),
+                           "vector " + field.name + " index " + i);
+            } else {
+              assert.deepEqual(vector.get(i), batch[i],
+                               "vector " + field.name + " index " + i);
+            }
+          }
+        }
+      });
+    });
+  });
+}
+
+describe('arrow random-access file', function () {
+  test_files.forEach(function (test_file) {
+    describe(test_file.name, function () {
+      var fields = test_file.fields
+      beforeEach(function () {
+        buf = fs.readFileSync(path.resolve(__dirname, test_file.name + '.arrow'));
+      });
+
+      makeSchemaChecks(fields);
+      makeDataChecks(test_file.batches, fields);
+    })
+  });
+});
+
+describe('arrow streaming file format', function () {
+  test_files.forEach(function (test_file) {
+    describe(test_file.name, function () {
+      var fields = test_file.fields
+      beforeEach(function () {
+        buf = fs.readFileSync(path.resolve(__dirname, test_file.name + '-stream.arrow'));
+      });
+
+      makeSchemaChecks(fields);
+      makeDataChecks(test_file.batches, fields);
+    })
+  });
+});

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/spec/dictionary-stream.arrow
----------------------------------------------------------------------
diff --git a/js/spec/dictionary-stream.arrow b/js/spec/dictionary-stream.arrow
new file mode 100644
index 0000000..17ca48b
Binary files /dev/null and b/js/spec/dictionary-stream.arrow differ

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/spec/dictionary.arrow
----------------------------------------------------------------------
diff --git a/js/spec/dictionary.arrow b/js/spec/dictionary.arrow
new file mode 100644
index 0000000..34d41db
Binary files /dev/null and b/js/spec/dictionary.arrow differ

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/spec/simple-stream.arrow
----------------------------------------------------------------------
diff --git a/js/spec/simple-stream.arrow b/js/spec/simple-stream.arrow
new file mode 100644
index 0000000..2c68c0e
Binary files /dev/null and b/js/spec/simple-stream.arrow differ

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/spec/simple.arrow
----------------------------------------------------------------------
diff --git a/js/spec/simple.arrow b/js/spec/simple.arrow
new file mode 100644
index 0000000..838db6d
Binary files /dev/null and b/js/spec/simple.arrow differ

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/spec/struct_example-stream.arrow
----------------------------------------------------------------------
diff --git a/js/spec/struct_example-stream.arrow b/js/spec/struct_example-stream.arrow
new file mode 100644
index 0000000..4e97b70
Binary files /dev/null and b/js/spec/struct_example-stream.arrow differ

http://git-wip-us.apache.org/repos/asf/arrow/blob/22c738cc/js/spec/struct_example.arrow
----------------------------------------------------------------------
diff --git a/js/spec/struct_example.arrow b/js/spec/struct_example.arrow
new file mode 100644
index 0000000..3d2c018
Binary files /dev/null and b/js/spec/struct_example.arrow differ


Mime
View raw message