arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [1/2] arrow git commit: ARROW-222: Prototyping an IO interface for Arrow, with initial HDFS target
Date Fri, 24 Jun 2016 23:55:36 GMT
Repository: arrow
Updated Branches:
  refs/heads/master f7ade7bfe -> ef9083029


http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/cpp/thirdparty/hadoop/include/hdfs.h
----------------------------------------------------------------------
diff --git a/cpp/thirdparty/hadoop/include/hdfs.h b/cpp/thirdparty/hadoop/include/hdfs.h
new file mode 100644
index 0000000..a4df6ae
--- /dev/null
+++ b/cpp/thirdparty/hadoop/include/hdfs.h
@@ -0,0 +1,1024 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBHDFS_HDFS_H
+#define LIBHDFS_HDFS_H
+
+#include <errno.h> /* for EINTERNAL, etc. */
+#include <fcntl.h> /* for O_RDONLY, O_WRONLY */
+#include <stdint.h> /* for uint64_t, etc. */
+#include <time.h> /* for time_t */
+
+/*
+ * Support export of DLL symbols during libhdfs build, and import of DLL symbols
+ * during client application build.  A client application may optionally define
+ * symbol LIBHDFS_DLL_IMPORT in its build.  This is not strictly required, but
+ * the compiler can produce more efficient code with it.
+ */
+#ifdef WIN32
+    #ifdef LIBHDFS_DLL_EXPORT
+        #define LIBHDFS_EXTERNAL __declspec(dllexport)
+    #elif LIBHDFS_DLL_IMPORT
+        #define LIBHDFS_EXTERNAL __declspec(dllimport)
+    #else
+        #define LIBHDFS_EXTERNAL
+    #endif
+#else
+    #ifdef LIBHDFS_DLL_EXPORT
+        #define LIBHDFS_EXTERNAL __attribute__((visibility("default")))
+    #elif LIBHDFS_DLL_IMPORT
+        #define LIBHDFS_EXTERNAL __attribute__((visibility("default")))
+    #else
+        #define LIBHDFS_EXTERNAL
+    #endif
+#endif
+
+#ifndef O_RDONLY
+#define O_RDONLY 1
+#endif
+
+#ifndef O_WRONLY
+#define O_WRONLY 2
+#endif
+
+#ifndef EINTERNAL
+#define EINTERNAL 255
+#endif
+
+#define ELASTIC_BYTE_BUFFER_POOL_CLASS \
+  "org/apache/hadoop/io/ElasticByteBufferPool"
+
+/** All APIs set errno to meaningful values */
+
+#ifdef __cplusplus
+extern  "C" {
+#endif
+    /**
+     * Some utility decls used in libhdfs.
+     */
+    struct hdfsBuilder;
+    typedef int32_t   tSize; /// size of data for read/write io ops
+    typedef time_t    tTime; /// time type in seconds
+    typedef int64_t   tOffset;/// offset within the file
+    typedef uint16_t  tPort; /// port
+    typedef enum tObjectKind {
+        kObjectKindFile = 'F',
+        kObjectKindDirectory = 'D',
+    } tObjectKind;
+    struct hdfsStreamBuilder;
+
+
+    /**
+     * The C reflection of org.apache.org.hadoop.FileSystem .
+     */
+    struct hdfs_internal;
+    typedef struct hdfs_internal* hdfsFS;
+
+    struct hdfsFile_internal;
+    typedef struct hdfsFile_internal* hdfsFile;
+
+    struct hadoopRzOptions;
+
+    struct hadoopRzBuffer;
+
+    /**
+     * Determine if a file is open for read.
+     *
+     * @param file     The HDFS file
+     * @return         1 if the file is open for read; 0 otherwise
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsFileIsOpenForRead(hdfsFile file);
+
+    /**
+     * Determine if a file is open for write.
+     *
+     * @param file     The HDFS file
+     * @return         1 if the file is open for write; 0 otherwise
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsFileIsOpenForWrite(hdfsFile file);
+
+    struct hdfsReadStatistics {
+      uint64_t totalBytesRead;
+      uint64_t totalLocalBytesRead;
+      uint64_t totalShortCircuitBytesRead;
+      uint64_t totalZeroCopyBytesRead;
+    };
+
+    /**
+     * Get read statistics about a file.  This is only applicable to files
+     * opened for reading.
+     *
+     * @param file     The HDFS file
+     * @param stats    (out parameter) on a successful return, the read
+     *                 statistics.  Unchanged otherwise.  You must free the
+     *                 returned statistics with hdfsFileFreeReadStatistics.
+     * @return         0 if the statistics were successfully returned,
+     *                 -1 otherwise.  On a failure, please check errno against
+     *                 ENOTSUP.  webhdfs, LocalFilesystem, and so forth may
+     *                 not support read statistics.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsFileGetReadStatistics(hdfsFile file,
+                                  struct hdfsReadStatistics **stats);
+
+    /**
+     * @param stats    HDFS read statistics for a file.
+     *
+     * @return the number of remote bytes read.
+     */
+    LIBHDFS_EXTERNAL
+    int64_t hdfsReadStatisticsGetRemoteBytesRead(
+                            const struct hdfsReadStatistics *stats);
+
+    /**
+     * Clear the read statistics for a file.
+     *
+     * @param file      The file to clear the read statistics of.
+     *
+     * @return          0 on success; the error code otherwise.
+     *                  EINVAL: the file is not open for reading.
+     *                  ENOTSUP: the file does not support clearing the read
+     *                  statistics.
+     *                  Errno will also be set to this code on failure.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsFileClearReadStatistics(hdfsFile file);
+
+    /**
+     * Free some HDFS read statistics.
+     *
+     * @param stats    The HDFS read statistics to free.
+     */
+    LIBHDFS_EXTERNAL
+    void hdfsFileFreeReadStatistics(struct hdfsReadStatistics *stats);
+
+    /**
+     * hdfsConnectAsUser - Connect to a hdfs file system as a specific user
+     * Connect to the hdfs.
+     * @param nn   The NameNode.  See hdfsBuilderSetNameNode for details.
+     * @param port The port on which the server is listening.
+     * @param user the user name (this is hadoop domain user). Or NULL is equivelant to hhdfsConnect(host, port)
+     * @return Returns a handle to the filesystem or NULL on error.
+     * @deprecated Use hdfsBuilderConnect instead.
+     */
+     LIBHDFS_EXTERNAL
+     hdfsFS hdfsConnectAsUser(const char* nn, tPort port, const char *user);
+
+    /**
+     * hdfsConnect - Connect to a hdfs file system.
+     * Connect to the hdfs.
+     * @param nn   The NameNode.  See hdfsBuilderSetNameNode for details.
+     * @param port The port on which the server is listening.
+     * @return Returns a handle to the filesystem or NULL on error.
+     * @deprecated Use hdfsBuilderConnect instead.
+     */
+     LIBHDFS_EXTERNAL
+     hdfsFS hdfsConnect(const char* nn, tPort port);
+
+    /**
+     * hdfsConnect - Connect to an hdfs file system.
+     *
+     * Forces a new instance to be created
+     *
+     * @param nn     The NameNode.  See hdfsBuilderSetNameNode for details.
+     * @param port   The port on which the server is listening.
+     * @param user   The user name to use when connecting
+     * @return       Returns a handle to the filesystem or NULL on error.
+     * @deprecated   Use hdfsBuilderConnect instead.
+     */
+     LIBHDFS_EXTERNAL
+     hdfsFS hdfsConnectAsUserNewInstance(const char* nn, tPort port, const char *user );
+
+    /**
+     * hdfsConnect - Connect to an hdfs file system.
+     *
+     * Forces a new instance to be created
+     *
+     * @param nn     The NameNode.  See hdfsBuilderSetNameNode for details.
+     * @param port   The port on which the server is listening.
+     * @return       Returns a handle to the filesystem or NULL on error.
+     * @deprecated   Use hdfsBuilderConnect instead.
+     */
+     LIBHDFS_EXTERNAL
+     hdfsFS hdfsConnectNewInstance(const char* nn, tPort port);
+
+    /**
+     * Connect to HDFS using the parameters defined by the builder.
+     *
+     * The HDFS builder will be freed, whether or not the connection was
+     * successful.
+     *
+     * Every successful call to hdfsBuilderConnect should be matched with a call
+     * to hdfsDisconnect, when the hdfsFS is no longer needed.
+     *
+     * @param bld    The HDFS builder
+     * @return       Returns a handle to the filesystem, or NULL on error.
+     */
+     LIBHDFS_EXTERNAL
+     hdfsFS hdfsBuilderConnect(struct hdfsBuilder *bld);
+
+    /**
+     * Create an HDFS builder.
+     *
+     * @return The HDFS builder, or NULL on error.
+     */
+    LIBHDFS_EXTERNAL
+    struct hdfsBuilder *hdfsNewBuilder(void);
+
+    /**
+     * Force the builder to always create a new instance of the FileSystem,
+     * rather than possibly finding one in the cache.
+     *
+     * @param bld The HDFS builder
+     */
+    LIBHDFS_EXTERNAL
+    void hdfsBuilderSetForceNewInstance(struct hdfsBuilder *bld);
+
+    /**
+     * Set the HDFS NameNode to connect to.
+     *
+     * @param bld  The HDFS builder
+     * @param nn   The NameNode to use.
+     *
+     *             If the string given is 'default', the default NameNode
+     *             configuration will be used (from the XML configuration files)
+     *
+     *             If NULL is given, a LocalFileSystem will be created.
+     *
+     *             If the string starts with a protocol type such as file:// or
+     *             hdfs://, this protocol type will be used.  If not, the
+     *             hdfs:// protocol type will be used.
+     *
+     *             You may specify a NameNode port in the usual way by
+     *             passing a string of the format hdfs://<hostname>:<port>.
+     *             Alternately, you may set the port with
+     *             hdfsBuilderSetNameNodePort.  However, you must not pass the
+     *             port in two different ways.
+     */
+    LIBHDFS_EXTERNAL
+    void hdfsBuilderSetNameNode(struct hdfsBuilder *bld, const char *nn);
+
+    /**
+     * Set the port of the HDFS NameNode to connect to.
+     *
+     * @param bld The HDFS builder
+     * @param port The port.
+     */
+    LIBHDFS_EXTERNAL
+    void hdfsBuilderSetNameNodePort(struct hdfsBuilder *bld, tPort port);
+
+    /**
+     * Set the username to use when connecting to the HDFS cluster.
+     *
+     * @param bld The HDFS builder
+     * @param userName The user name.  The string will be shallow-copied.
+     */
+    LIBHDFS_EXTERNAL
+    void hdfsBuilderSetUserName(struct hdfsBuilder *bld, const char *userName);
+
+    /**
+     * Set the path to the Kerberos ticket cache to use when connecting to
+     * the HDFS cluster.
+     *
+     * @param bld The HDFS builder
+     * @param kerbTicketCachePath The Kerberos ticket cache path.  The string
+     *                            will be shallow-copied.
+     */
+    LIBHDFS_EXTERNAL
+    void hdfsBuilderSetKerbTicketCachePath(struct hdfsBuilder *bld,
+                                   const char *kerbTicketCachePath);
+
+    /**
+     * Free an HDFS builder.
+     *
+     * It is normally not necessary to call this function since
+     * hdfsBuilderConnect frees the builder.
+     *
+     * @param bld The HDFS builder
+     */
+    LIBHDFS_EXTERNAL
+    void hdfsFreeBuilder(struct hdfsBuilder *bld);
+
+    /**
+     * Set a configuration string for an HdfsBuilder.
+     *
+     * @param key      The key to set.
+     * @param val      The value, or NULL to set no value.
+     *                 This will be shallow-copied.  You are responsible for
+     *                 ensuring that it remains valid until the builder is
+     *                 freed.
+     *
+     * @return         0 on success; nonzero error code otherwise.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsBuilderConfSetStr(struct hdfsBuilder *bld, const char *key,
+                              const char *val);
+
+    /**
+     * Get a configuration string.
+     *
+     * @param key      The key to find
+     * @param val      (out param) The value.  This will be set to NULL if the
+     *                 key isn't found.  You must free this string with
+     *                 hdfsConfStrFree.
+     *
+     * @return         0 on success; nonzero error code otherwise.
+     *                 Failure to find the key is not an error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsConfGetStr(const char *key, char **val);
+
+    /**
+     * Get a configuration integer.
+     *
+     * @param key      The key to find
+     * @param val      (out param) The value.  This will NOT be changed if the
+     *                 key isn't found.
+     *
+     * @return         0 on success; nonzero error code otherwise.
+     *                 Failure to find the key is not an error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsConfGetInt(const char *key, int32_t *val);
+
+    /**
+     * Free a configuration string found with hdfsConfGetStr.
+     *
+     * @param val      A configuration string obtained from hdfsConfGetStr
+     */
+    LIBHDFS_EXTERNAL
+    void hdfsConfStrFree(char *val);
+
+    /**
+     * hdfsDisconnect - Disconnect from the hdfs file system.
+     * Disconnect from hdfs.
+     * @param fs The configured filesystem handle.
+     * @return Returns 0 on success, -1 on error.
+     *         Even if there is an error, the resources associated with the
+     *         hdfsFS will be freed.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsDisconnect(hdfsFS fs);
+
+    /**
+     * hdfsOpenFile - Open a hdfs file in given mode.
+     * @deprecated    Use the hdfsStreamBuilder functions instead.
+     * This function does not support setting block sizes bigger than 2 GB.
+     *
+     * @param fs The configured filesystem handle.
+     * @param path The full path to the file.
+     * @param flags - an | of bits/fcntl.h file flags - supported flags are O_RDONLY, O_WRONLY (meaning create or overwrite i.e., implies O_TRUNCAT),
+     * O_WRONLY|O_APPEND. Other flags are generally ignored other than (O_RDWR || (O_EXCL & O_CREAT)) which return NULL and set errno equal ENOTSUP.
+     * @param bufferSize Size of buffer for read/write - pass 0 if you want
+     * to use the default configured values.
+     * @param replication Block replication - pass 0 if you want to use
+     * the default configured values.
+     * @param blocksize Size of block - pass 0 if you want to use the
+     * default configured values.  Note that if you want a block size bigger
+     * than 2 GB, you must use the hdfsStreamBuilder API rather than this
+     * deprecated function.
+     * @return Returns the handle to the open file or NULL on error.
+     */
+    LIBHDFS_EXTERNAL
+    hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags,
+                          int bufferSize, short replication, tSize blocksize);
+
+    /**
+     * hdfsStreamBuilderAlloc - Allocate an HDFS stream builder.
+     *
+     * @param fs The configured filesystem handle.
+     * @param path The full path to the file.  Will be deep-copied.
+     * @param flags The open flags, as in hdfsOpenFile.
+     * @return Returns the hdfsStreamBuilder, or NULL on error.
+     */
+    LIBHDFS_EXTERNAL
+    struct hdfsStreamBuilder *hdfsStreamBuilderAlloc(hdfsFS fs,
+                                      const char *path, int flags);
+
+    /**
+     * hdfsStreamBuilderFree - Free an HDFS file builder.
+     *
+     * It is normally not necessary to call this function since
+     * hdfsStreamBuilderBuild frees the builder.
+     *
+     * @param bld The hdfsStreamBuilder to free.
+     */
+    LIBHDFS_EXTERNAL
+    void hdfsStreamBuilderFree(struct hdfsStreamBuilder *bld);
+
+    /**
+     * hdfsStreamBuilderSetBufferSize - Set the stream buffer size.
+     *
+     * @param bld The hdfs stream builder.
+     * @param bufferSize The buffer size to set.
+     *
+     * @return 0 on success, or -1 on error.  Errno will be set on error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsStreamBuilderSetBufferSize(struct hdfsStreamBuilder *bld,
+                                       int32_t bufferSize);
+
+    /**
+     * hdfsStreamBuilderSetReplication - Set the replication for the stream.
+     * This is only relevant for output streams, which will create new blocks.
+     *
+     * @param bld The hdfs stream builder.
+     * @param replication The replication to set.
+     *
+     * @return 0 on success, or -1 on error.  Errno will be set on error.
+     *              If you call this on an input stream builder, you will get
+     *              EINVAL, because this configuration is not relevant to input
+     *              streams.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsStreamBuilderSetReplication(struct hdfsStreamBuilder *bld,
+                                        int16_t replication);
+
+    /**
+     * hdfsStreamBuilderSetDefaultBlockSize - Set the default block size for
+     * the stream.  This is only relevant for output streams, which will create
+     * new blocks.
+     *
+     * @param bld The hdfs stream builder.
+     * @param defaultBlockSize The default block size to set.
+     *
+     * @return 0 on success, or -1 on error.  Errno will be set on error.
+     *              If you call this on an input stream builder, you will get
+     *              EINVAL, because this configuration is not relevant to input
+     *              streams.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsStreamBuilderSetDefaultBlockSize(struct hdfsStreamBuilder *bld,
+                                       int64_t defaultBlockSize);
+
+    /**
+     * hdfsStreamBuilderBuild - Build the stream by calling open or create.
+     *
+     * @param bld The hdfs stream builder.  This pointer will be freed, whether
+     *            or not the open succeeds.
+     *
+     * @return the stream pointer on success, or NULL on error.  Errno will be
+     * set on error.
+     */
+    LIBHDFS_EXTERNAL
+    hdfsFile hdfsStreamBuilderBuild(struct hdfsStreamBuilder *bld);
+
+    /**
+     * hdfsTruncateFile - Truncate a hdfs file to given lenght.
+     * @param fs The configured filesystem handle.
+     * @param path The full path to the file.
+     * @param newlength The size the file is to be truncated to
+     * @return 1 if the file has been truncated to the desired newlength
+     *         and is immediately available to be reused for write operations
+     *         such as append.
+     *         0 if a background process of adjusting the length of the last
+     *         block has been started, and clients should wait for it to
+     *         complete before proceeding with further file updates.
+     *         -1 on error.
+     */
+    int hdfsTruncateFile(hdfsFS fs, const char* path, tOffset newlength);
+
+    /**
+     * hdfsUnbufferFile - Reduce the buffering done on a file.
+     *
+     * @param file  The file to unbuffer.
+     * @return      0 on success
+     *              ENOTSUP if the file does not support unbuffering
+     *              Errno will also be set to this value.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsUnbufferFile(hdfsFile file);
+
+    /**
+     * hdfsCloseFile - Close an open file.
+     * @param fs The configured filesystem handle.
+     * @param file The file handle.
+     * @return Returns 0 on success, -1 on error.
+     *         On error, errno will be set appropriately.
+     *         If the hdfs file was valid, the memory associated with it will
+     *         be freed at the end of this call, even if there was an I/O
+     *         error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsCloseFile(hdfsFS fs, hdfsFile file);
+
+
+    /**
+     * hdfsExists - Checks if a given path exsits on the filesystem
+     * @param fs The configured filesystem handle.
+     * @param path The path to look for
+     * @return Returns 0 on success, -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsExists(hdfsFS fs, const char *path);
+
+
+    /**
+     * hdfsSeek - Seek to given offset in file.
+     * This works only for files opened in read-only mode.
+     * @param fs The configured filesystem handle.
+     * @param file The file handle.
+     * @param desiredPos Offset into the file to seek into.
+     * @return Returns 0 on success, -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos);
+
+
+    /**
+     * hdfsTell - Get the current offset in the file, in bytes.
+     * @param fs The configured filesystem handle.
+     * @param file The file handle.
+     * @return Current offset, -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    tOffset hdfsTell(hdfsFS fs, hdfsFile file);
+
+
+    /**
+     * hdfsRead - Read data from an open file.
+     * @param fs The configured filesystem handle.
+     * @param file The file handle.
+     * @param buffer The buffer to copy read bytes into.
+     * @param length The length of the buffer.
+     * @return      On success, a positive number indicating how many bytes
+     *              were read.
+     *              On end-of-file, 0.
+     *              On error, -1.  Errno will be set to the error code.
+     *              Just like the POSIX read function, hdfsRead will return -1
+     *              and set errno to EINTR if data is temporarily unavailable,
+     *              but we are not yet at the end of the file.
+     */
+    LIBHDFS_EXTERNAL
+    tSize hdfsRead(hdfsFS fs, hdfsFile file, void* buffer, tSize length);
+
+    /**
+     * hdfsPread - Positional read of data from an open file.
+     * @param fs The configured filesystem handle.
+     * @param file The file handle.
+     * @param position Position from which to read
+     * @param buffer The buffer to copy read bytes into.
+     * @param length The length of the buffer.
+     * @return      See hdfsRead
+     */
+    LIBHDFS_EXTERNAL
+    tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position,
+                    void* buffer, tSize length);
+
+
+    /**
+     * hdfsWrite - Write data into an open file.
+     * @param fs The configured filesystem handle.
+     * @param file The file handle.
+     * @param buffer The data.
+     * @param length The no. of bytes to write.
+     * @return Returns the number of bytes written, -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void* buffer,
+                    tSize length);
+
+
+    /**
+     * hdfsWrite - Flush the data.
+     * @param fs The configured filesystem handle.
+     * @param file The file handle.
+     * @return Returns 0 on success, -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsFlush(hdfsFS fs, hdfsFile file);
+
+
+    /**
+     * hdfsHFlush - Flush out the data in client's user buffer. After the
+     * return of this call, new readers will see the data.
+     * @param fs configured filesystem handle
+     * @param file file handle
+     * @return 0 on success, -1 on error and sets errno
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsHFlush(hdfsFS fs, hdfsFile file);
+
+
+    /**
+     * hdfsHSync - Similar to posix fsync, Flush out the data in client's
+     * user buffer. all the way to the disk device (but the disk may have
+     * it in its cache).
+     * @param fs configured filesystem handle
+     * @param file file handle
+     * @return 0 on success, -1 on error and sets errno
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsHSync(hdfsFS fs, hdfsFile file);
+
+
+    /**
+     * hdfsAvailable - Number of bytes that can be read from this
+     * input stream without blocking.
+     * @param fs The configured filesystem handle.
+     * @param file The file handle.
+     * @return Returns available bytes; -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsAvailable(hdfsFS fs, hdfsFile file);
+
+
+    /**
+     * hdfsCopy - Copy file from one filesystem to another.
+     * @param srcFS The handle to source filesystem.
+     * @param src The path of source file.
+     * @param dstFS The handle to destination filesystem.
+     * @param dst The path of destination file.
+     * @return Returns 0 on success, -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
+
+
+    /**
+     * hdfsMove - Move file from one filesystem to another.
+     * @param srcFS The handle to source filesystem.
+     * @param src The path of source file.
+     * @param dstFS The handle to destination filesystem.
+     * @param dst The path of destination file.
+     * @return Returns 0 on success, -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsMove(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
+
+
+    /**
+     * hdfsDelete - Delete file.
+     * @param fs The configured filesystem handle.
+     * @param path The path of the file.
+     * @param recursive if path is a directory and set to
+     * non-zero, the directory is deleted else throws an exception. In
+     * case of a file the recursive argument is irrelevant.
+     * @return Returns 0 on success, -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsDelete(hdfsFS fs, const char* path, int recursive);
+
+    /**
+     * hdfsRename - Rename file.
+     * @param fs The configured filesystem handle.
+     * @param oldPath The path of the source file.
+     * @param newPath The path of the destination file.
+     * @return Returns 0 on success, -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath);
+
+
+    /**
+     * hdfsGetWorkingDirectory - Get the current working directory for
+     * the given filesystem.
+     * @param fs The configured filesystem handle.
+     * @param buffer The user-buffer to copy path of cwd into.
+     * @param bufferSize The length of user-buffer.
+     * @return Returns buffer, NULL on error.
+     */
+    LIBHDFS_EXTERNAL
+    char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize);
+
+
+    /**
+     * hdfsSetWorkingDirectory - Set the working directory. All relative
+     * paths will be resolved relative to it.
+     * @param fs The configured filesystem handle.
+     * @param path The path of the new 'cwd'.
+     * @return Returns 0 on success, -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsSetWorkingDirectory(hdfsFS fs, const char* path);
+
+
+    /**
+     * hdfsCreateDirectory - Make the given file and all non-existent
+     * parents into directories.
+     * @param fs The configured filesystem handle.
+     * @param path The path of the directory.
+     * @return Returns 0 on success, -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsCreateDirectory(hdfsFS fs, const char* path);
+
+
+    /**
+     * hdfsSetReplication - Set the replication of the specified
+     * file to the supplied value
+     * @param fs The configured filesystem handle.
+     * @param path The path of the file.
+     * @return Returns 0 on success, -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication);
+
+
+    /**
+     * hdfsFileInfo - Information about a file/directory.
+     */
+    typedef struct  {
+        tObjectKind mKind;   /* file or directory */
+        char *mName;         /* the name of the file */
+        tTime mLastMod;      /* the last modification time for the file in seconds */
+        tOffset mSize;       /* the size of the file in bytes */
+        short mReplication;    /* the count of replicas */
+        tOffset mBlockSize;  /* the block size for the file */
+        char *mOwner;        /* the owner of the file */
+        char *mGroup;        /* the group associated with the file */
+        short mPermissions;  /* the permissions associated with the file */
+        tTime mLastAccess;    /* the last access time for the file in seconds */
+    } hdfsFileInfo;
+
+
+    /**
+     * hdfsListDirectory - Get list of files/directories for a given
+     * directory-path. hdfsFreeFileInfo should be called to deallocate memory.
+     * @param fs The configured filesystem handle.
+     * @param path The path of the directory.
+     * @param numEntries Set to the number of files/directories in path.
+     * @return Returns a dynamically-allocated array of hdfsFileInfo
+     * objects; NULL on error or empty directory.
+     * errno is set to non-zero on error or zero on success.
+     */
+    LIBHDFS_EXTERNAL
+    hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path,
+                                    int *numEntries);
+
+
+    /**
+     * hdfsGetPathInfo - Get information about a path as a (dynamically
+     * allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be
+     * called when the pointer is no longer needed.
+     * @param fs The configured filesystem handle.
+     * @param path The path of the file.
+     * @return Returns a dynamically-allocated hdfsFileInfo object;
+     * NULL on error.
+     */
+    LIBHDFS_EXTERNAL
+    hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path);
+
+
+    /**
+     * hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields)
+     * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
+     * objects.
+     * @param numEntries The size of the array.
+     */
+    LIBHDFS_EXTERNAL
+    void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries);
+
+    /**
+     * hdfsFileIsEncrypted: determine if a file is encrypted based on its
+     * hdfsFileInfo.
+     * @return -1 if there was an error (errno will be set), 0 if the file is
+     *         not encrypted, 1 if the file is encrypted.
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsFileIsEncrypted(hdfsFileInfo *hdfsFileInfo);
+
+
+    /**
+     * hdfsGetHosts - Get hostnames where a particular block (determined by
+     * pos & blocksize) of a file is stored. The last element in the array
+     * is NULL. Due to replication, a single block could be present on
+     * multiple hosts.
+     * @param fs The configured filesystem handle.
+     * @param path The path of the file.
+     * @param start The start of the block.
+     * @param length The length of the block.
+     * @return Returns a dynamically-allocated 2-d array of blocks-hosts;
+     * NULL on error.
+     */
+    LIBHDFS_EXTERNAL
+    char*** hdfsGetHosts(hdfsFS fs, const char* path,
+            tOffset start, tOffset length);
+
+
+    /**
+     * hdfsFreeHosts - Free up the structure returned by hdfsGetHosts
+     * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
+     * objects.
+     * @param numEntries The size of the array.
+     */
+    LIBHDFS_EXTERNAL
+    void hdfsFreeHosts(char ***blockHosts);
+
+
+    /**
+     * hdfsGetDefaultBlockSize - Get the default blocksize.
+     *
+     * @param fs            The configured filesystem handle.
+     * @deprecated          Use hdfsGetDefaultBlockSizeAtPath instead.
+     *
+     * @return              Returns the default blocksize, or -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    tOffset hdfsGetDefaultBlockSize(hdfsFS fs);
+
+
+    /**
+     * hdfsGetDefaultBlockSizeAtPath - Get the default blocksize at the
+     * filesystem indicated by a given path.
+     *
+     * @param fs            The configured filesystem handle.
+     * @param path          The given path will be used to locate the actual
+     *                      filesystem.  The full path does not have to exist.
+     *
+     * @return              Returns the default blocksize, or -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    tOffset hdfsGetDefaultBlockSizeAtPath(hdfsFS fs, const char *path);
+
+
+    /**
+     * hdfsGetCapacity - Return the raw capacity of the filesystem.
+     * @param fs The configured filesystem handle.
+     * @return Returns the raw-capacity; -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    tOffset hdfsGetCapacity(hdfsFS fs);
+
+
+    /**
+     * hdfsGetUsed - Return the total raw size of all files in the filesystem.
+     * @param fs The configured filesystem handle.
+     * @return Returns the total-size; -1 on error.
+     */
+    LIBHDFS_EXTERNAL
+    tOffset hdfsGetUsed(hdfsFS fs);
+
+    /**
+     * Change the user and/or group of a file or directory.
+     *
+     * @param fs            The configured filesystem handle.
+     * @param path          the path to the file or directory
+     * @param owner         User string.  Set to NULL for 'no change'
+     * @param group         Group string.  Set to NULL for 'no change'
+     * @return              0 on success else -1
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsChown(hdfsFS fs, const char* path, const char *owner,
+                  const char *group);
+
+    /**
+     * hdfsChmod
+     * @param fs The configured filesystem handle.
+     * @param path the path to the file or directory
+     * @param mode the bitmask to set it to
+     * @return 0 on success else -1
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsChmod(hdfsFS fs, const char* path, short mode);
+
+    /**
+     * hdfsUtime
+     * @param fs The configured filesystem handle.
+     * @param path the path to the file or directory
+     * @param mtime new modification time or -1 for no change
+     * @param atime new access time or -1 for no change
+     * @return 0 on success else -1
+     */
+    LIBHDFS_EXTERNAL
+    int hdfsUtime(hdfsFS fs, const char* path, tTime mtime, tTime atime);
+
+    /**
+     * Allocate a zero-copy options structure.
+     *
+     * You must free all options structures allocated with this function using
+     * hadoopRzOptionsFree.
+     *
+     * @return            A zero-copy options structure, or NULL if one could
+     *                    not be allocated.  If NULL is returned, errno will
+     *                    contain the error number.
+     */
+    LIBHDFS_EXTERNAL
+    struct hadoopRzOptions *hadoopRzOptionsAlloc(void);
+
+    /**
+     * Determine whether we should skip checksums in read0.
+     *
+     * @param opts        The options structure.
+     * @param skip        Nonzero to skip checksums sometimes; zero to always
+     *                    check them.
+     *
+     * @return            0 on success; -1 plus errno on failure.
+     */
+    LIBHDFS_EXTERNAL
+    int hadoopRzOptionsSetSkipChecksum(
+            struct hadoopRzOptions *opts, int skip);
+
+    /**
+     * Set the ByteBufferPool to use with read0.
+     *
+     * @param opts        The options structure.
+     * @param className   If this is NULL, we will not use any
+     *                    ByteBufferPool.  If this is non-NULL, it will be
+     *                    treated as the name of the pool class to use.
+     *                    For example, you can use
+     *                    ELASTIC_BYTE_BUFFER_POOL_CLASS.
+     *
+     * @return            0 if the ByteBufferPool class was found and
+     *                    instantiated;
+     *                    -1 plus errno otherwise.
+     */
+    LIBHDFS_EXTERNAL
+    int hadoopRzOptionsSetByteBufferPool(
+            struct hadoopRzOptions *opts, const char *className);
+
+    /**
+     * Free a hadoopRzOptionsFree structure.
+     *
+     * @param opts        The options structure to free.
+     *                    Any associated ByteBufferPool will also be freed.
+     */
+    LIBHDFS_EXTERNAL
+    void hadoopRzOptionsFree(struct hadoopRzOptions *opts);
+
+    /**
+     * Perform a byte buffer read.
+     * If possible, this will be a zero-copy (mmap) read.
+     *
+     * @param file       The file to read from.
+     * @param opts       An options structure created by hadoopRzOptionsAlloc.
+     * @param maxLength  The maximum length to read.  We may read fewer bytes
+     *                   than this length.
+     *
+     * @return           On success, we will return a new hadoopRzBuffer.
+     *                   This buffer will continue to be valid and readable
+     *                   until it is released by readZeroBufferFree.  Failure to
+     *                   release a buffer will lead to a memory leak.
+     *                   You can access the data within the hadoopRzBuffer with
+     *                   hadoopRzBufferGet.  If you have reached EOF, the data
+     *                   within the hadoopRzBuffer will be NULL.  You must still
+     *                   free hadoopRzBuffer instances containing NULL.
+     *
+     *                   On failure, we will return NULL plus an errno code.
+     *                   errno = EOPNOTSUPP indicates that we could not do a
+     *                   zero-copy read, and there was no ByteBufferPool
+     *                   supplied.
+     */
+    LIBHDFS_EXTERNAL
+    struct hadoopRzBuffer* hadoopReadZero(hdfsFile file,
+            struct hadoopRzOptions *opts, int32_t maxLength);
+
+    /**
+     * Determine the length of the buffer returned from readZero.
+     *
+     * @param buffer     a buffer returned from readZero.
+     * @return           the length of the buffer.
+     */
+    LIBHDFS_EXTERNAL
+    int32_t hadoopRzBufferLength(const struct hadoopRzBuffer *buffer);
+
+    /**
+     * Get a pointer to the raw buffer returned from readZero.
+     *
+     * To find out how many bytes this buffer contains, call
+     * hadoopRzBufferLength.
+     *
+     * @param buffer     a buffer returned from readZero.
+     * @return           a pointer to the start of the buffer.  This will be
+     *                   NULL when end-of-file has been reached.
+     */
+    LIBHDFS_EXTERNAL
+    const void *hadoopRzBufferGet(const struct hadoopRzBuffer *buffer);
+
+    /**
+     * Release a buffer obtained through readZero.
+     *
+     * @param file       The hdfs stream that created this buffer.  This must be
+     *                   the same stream you called hadoopReadZero on.
+     * @param buffer     The buffer to release.
+     */
+    LIBHDFS_EXTERNAL
+    void hadoopRzBufferFree(hdfsFile file, struct hadoopRzBuffer *buffer);
+
+#ifdef __cplusplus
+}
+#endif
+
+#undef LIBHDFS_EXTERNAL
+#endif /*LIBHDFS_HDFS_H*/
+
+/**
+ * vim: ts=4: sw=4: et
+ */

http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/dev/merge_arrow_pr.py
----------------------------------------------------------------------
diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py
index 981779f..8f47f93 100755
--- a/dev/merge_arrow_pr.py
+++ b/dev/merge_arrow_pr.py
@@ -173,7 +173,10 @@ def merge_pr(pr_num, target_ref):
     for c in commits:
         merge_message_flags += ["-m", c]
 
-    run_cmd(['git', 'commit', '--author="%s"' % primary_author] + merge_message_flags)
+    run_cmd(['git', 'commit',
+             '--no-verify',  # do not run commit hooks
+             '--author="%s"' % primary_author] +
+            merge_message_flags)
 
     continue_maybe("Merge complete (local ref %s). Push to %s?" % (
         target_branch_name, PUSH_REMOTE_NAME))

http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index f1becfc..fdbfce9 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -348,8 +348,10 @@ find_package(Arrow REQUIRED)
 include_directories(SYSTEM ${ARROW_INCLUDE_DIR})
 ADD_THIRDPARTY_LIB(arrow
   SHARED_LIB ${ARROW_SHARED_LIB})
+ADD_THIRDPARTY_LIB(arrow_io
+  SHARED_LIB ${ARROW_IO_SHARED_LIB})
 ADD_THIRDPARTY_LIB(arrow_parquet
-    SHARED_LIB ${ARROW_PARQUET_SHARED_LIB})
+  SHARED_LIB ${ARROW_PARQUET_SHARED_LIB})
 
 ############################################################
 # Linker setup
@@ -428,6 +430,7 @@ set(PYARROW_SRCS
 
 set(LINK_LIBS
   arrow
+  arrow_io
   arrow_parquet
 )
 
@@ -449,6 +452,7 @@ set(CYTHON_EXTENSIONS
   array
   config
   error
+  io
   parquet
   scalar
   schema

http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/python/cmake_modules/FindArrow.cmake
----------------------------------------------------------------------
diff --git a/python/cmake_modules/FindArrow.cmake b/python/cmake_modules/FindArrow.cmake
index f0b258e..6bd3056 100644
--- a/python/cmake_modules/FindArrow.cmake
+++ b/python/cmake_modules/FindArrow.cmake
@@ -47,13 +47,24 @@ find_library(ARROW_PARQUET_LIB_PATH NAMES arrow_parquet
   ${ARROW_SEARCH_LIB_PATH}
   NO_DEFAULT_PATH)
 
+find_library(ARROW_IO_LIB_PATH NAMES arrow_io
+  PATHS
+  ${ARROW_SEARCH_LIB_PATH}
+  NO_DEFAULT_PATH)
+
 if (ARROW_INCLUDE_DIR AND ARROW_LIB_PATH AND ARROW_PARQUET_LIB_PATH)
   set(ARROW_FOUND TRUE)
   set(ARROW_LIB_NAME libarrow)
+  set(ARROW_IO_LIB_NAME libarrow_io)
   set(ARROW_PARQUET_LIB_NAME libarrow_parquet)
+
   set(ARROW_LIBS ${ARROW_SEARCH_LIB_PATH})
   set(ARROW_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_LIB_NAME}.a)
   set(ARROW_SHARED_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+
+  set(ARROW_IO_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_IO_LIB_NAME}.a)
+  set(ARROW_IO_SHARED_LIB ${ARROW_LIBS}/${ARROW_IO_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+
   set(ARROW_PARQUET_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_PARQUET_LIB_NAME}.a)
   set(ARROW_PARQUET_SHARED_LIB ${ARROW_LIBS}/${ARROW_PARQUET_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
 else ()
@@ -62,7 +73,9 @@ endif ()
 
 if (ARROW_FOUND)
   if (NOT Arrow_FIND_QUIETLY)
-    message(STATUS "Found the Arrow library: ${ARROW_LIB_PATH}, ${ARROW_PARQUET_LIB_PATH}")
+    message(STATUS "Found the Arrow core library: ${ARROW_LIB_PATH}")
+    message(STATUS "Found the Arrow IO library: ${ARROW_IO_LIB_PATH}")
+    message(STATUS "Found the Arrow Parquet library: ${ARROW_PARQUET_LIB_PATH}")
   endif ()
 else ()
   if (NOT Arrow_FIND_QUIETLY)
@@ -82,6 +95,8 @@ mark_as_advanced(
   ARROW_LIBS
   ARROW_STATIC_LIB
   ARROW_SHARED_LIB
+  ARROW_IO_STATIC_LIB
+  ARROW_IO_SHARED_LIB
   ARROW_PARQUET_STATIC_LIB
   ARROW_PARQUET_SHARED_LIB
 )

http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/python/conda.recipe/meta.yaml
----------------------------------------------------------------------
diff --git a/python/conda.recipe/meta.yaml b/python/conda.recipe/meta.yaml
index 85d24b6..98ae414 100644
--- a/python/conda.recipe/meta.yaml
+++ b/python/conda.recipe/meta.yaml
@@ -26,6 +26,7 @@ requirements:
 
   run:
     - arrow-cpp
+    - parquet-cpp
     - python
     - numpy
     - pandas

http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/python/pyarrow/error.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/error.pxd b/python/pyarrow/error.pxd
index 97ba0ef..1fb6fad 100644
--- a/python/pyarrow/error.pxd
+++ b/python/pyarrow/error.pxd
@@ -18,5 +18,5 @@
 from pyarrow.includes.libarrow cimport CStatus
 from pyarrow.includes.pyarrow cimport *
 
-cdef check_cstatus(const CStatus& status)
-cdef check_status(const Status& status)
+cdef int check_cstatus(const CStatus& status) nogil except -1
+cdef int check_status(const Status& status) nogil except -1

http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/python/pyarrow/error.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/error.pyx b/python/pyarrow/error.pyx
index 5a6a038..2440193 100644
--- a/python/pyarrow/error.pyx
+++ b/python/pyarrow/error.pyx
@@ -22,16 +22,18 @@ from pyarrow.compat import frombytes
 class ArrowException(Exception):
     pass
 
-cdef check_cstatus(const CStatus& status):
+cdef int check_cstatus(const CStatus& status) nogil except -1:
     if status.ok():
-        return
+        return 0
 
     cdef c_string c_message = status.ToString()
-    raise ArrowException(frombytes(c_message))
+    with gil:
+        raise ArrowException(frombytes(c_message))
 
-cdef check_status(const Status& status):
+cdef int check_status(const Status& status) nogil except -1:
     if status.ok():
-        return
+        return 0
 
     cdef c_string c_message = status.ToString()
-    raise ArrowException(frombytes(c_message))
+    with gil:
+        raise ArrowException(frombytes(c_message))

http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/python/pyarrow/includes/common.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd
index 1f6ecee..133797b 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -33,3 +33,21 @@ cdef extern from "<iostream>":
 cdef extern from "<Python.h>":
     void Py_XDECREF(PyObject* o)
 
+cdef extern from "arrow/api.h" namespace "arrow" nogil:
+    # We can later add more of the common status factory methods as needed
+    cdef CStatus CStatus_OK "Status::OK"()
+
+    cdef cppclass CStatus "arrow::Status":
+        CStatus()
+
+        c_string ToString()
+
+        c_bool ok()
+        c_bool IsOutOfMemory()
+        c_bool IsKeyError()
+        c_bool IsNotImplemented()
+        c_bool IsInvalid()
+
+    cdef cppclass Buffer:
+        uint8_t* data()
+        int64_t size()

http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 90414e3..91ce069 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -20,25 +20,6 @@
 from pyarrow.includes.common cimport *
 
 cdef extern from "arrow/api.h" namespace "arrow" nogil:
-    # We can later add more of the common status factory methods as needed
-    cdef CStatus CStatus_OK "Status::OK"()
-
-    cdef cppclass CStatus "arrow::Status":
-        CStatus()
-
-        c_string ToString()
-
-        c_bool ok()
-        c_bool IsOutOfMemory()
-        c_bool IsKeyError()
-        c_bool IsNotImplemented()
-        c_bool IsInvalid()
-
-    cdef cppclass Buffer:
-        uint8_t* data()
-        int64_t size()
-
-cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
     enum Type" arrow::Type::type":
         Type_NA" arrow::Type::NA"

http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/python/pyarrow/includes/libarrow_io.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow_io.pxd b/python/pyarrow/includes/libarrow_io.pxd
new file mode 100644
index 0000000..d874ba3
--- /dev/null
+++ b/python/pyarrow/includes/libarrow_io.pxd
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.common cimport *
+
+cdef extern from "arrow/io/interfaces.h" nogil:
+    enum ObjectType" arrow::io::ObjectType::type":
+        ObjectType_FILE" arrow::io::ObjectType::FILE"
+        ObjectType_DIRECTORY" arrow::io::ObjectType::DIRECTORY"
+
+cdef extern from "arrow/io/hdfs.h" namespace "arrow::io" nogil:
+    CStatus ConnectLibHdfs()
+
+    cdef cppclass HdfsConnectionConfig:
+        c_string host
+        int port
+        c_string user
+
+    cdef cppclass HdfsPathInfo:
+        ObjectType kind;
+        c_string name
+        c_string owner
+        c_string group
+        int32_t last_modified_time
+        int32_t last_access_time
+        int64_t size
+        int16_t replication
+        int64_t block_size
+        int16_t permissions
+
+    cdef cppclass CHdfsFile:
+        CStatus Close()
+        CStatus Seek(int64_t position)
+        CStatus Tell(int64_t* position)
+
+    cdef cppclass HdfsReadableFile(CHdfsFile):
+        CStatus GetSize(int64_t* size)
+        CStatus Read(int32_t nbytes, int32_t* bytes_read,
+                     uint8_t* buffer)
+
+        CStatus ReadAt(int64_t position, int32_t nbytes,
+                       int32_t* bytes_read, uint8_t* buffer)
+
+    cdef cppclass HdfsWriteableFile(CHdfsFile):
+        CStatus Write(const uint8_t* buffer, int32_t nbytes)
+
+        CStatus Write(const uint8_t* buffer, int32_t nbytes,
+                      int32_t* bytes_written)
+
+    cdef cppclass CHdfsClient" arrow::io::HdfsClient":
+        @staticmethod
+        CStatus Connect(const HdfsConnectionConfig* config,
+                        shared_ptr[CHdfsClient]* client)
+
+        CStatus CreateDirectory(const c_string& path)
+
+        CStatus Delete(const c_string& path, c_bool recursive)
+
+        CStatus Disconnect()
+
+        c_bool Exists(const c_string& path)
+
+        CStatus GetCapacity(int64_t* nbytes)
+        CStatus GetUsed(int64_t* nbytes)
+
+        CStatus ListDirectory(const c_string& path,
+                              vector[HdfsPathInfo]* listing)
+
+        CStatus Rename(const c_string& src, const c_string& dst)
+
+        CStatus OpenReadable(const c_string& path,
+                             shared_ptr[HdfsReadableFile]* handle)
+
+        CStatus OpenWriteable(const c_string& path, c_bool append,
+                              int32_t buffer_size, int16_t replication,
+                              int64_t default_block_size,
+                              shared_ptr[HdfsWriteableFile]* handle)

http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/python/pyarrow/io.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx
new file mode 100644
index 0000000..8b97671
--- /dev/null
+++ b/python/pyarrow/io.pyx
@@ -0,0 +1,504 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Cython wrappers for IO interfaces defined in arrow/io
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from libc.stdlib cimport malloc, free
+
+from pyarrow.includes.libarrow cimport *
+cimport pyarrow.includes.pyarrow as pyarrow
+from pyarrow.includes.libarrow_io cimport *
+
+from pyarrow.compat import frombytes, tobytes
+from pyarrow.error cimport check_cstatus
+
+cimport cpython as cp
+
+import re
+import sys
+import threading
+
+_HDFS_PATH_RE = re.compile('hdfs://(.*):(\d+)(.*)')
+
+try:
+    # Python 3
+    from queue import Queue, Empty as QueueEmpty, Full as QueueFull
+except ImportError:
+    from Queue import Queue, Empty as QueueEmpty, Full as QueueFull
+
+
+def have_libhdfs():
+    try:
+        check_cstatus(ConnectLibHdfs())
+        return True
+    except:
+        return False
+
+
+def strip_hdfs_abspath(path):
+    m = _HDFS_PATH_RE.match(path)
+    if m:
+        return m.group(3)
+    else:
+        return path
+
+
+cdef class HdfsClient:
+    cdef:
+        shared_ptr[CHdfsClient] client
+
+    cdef readonly:
+        object host
+        int port
+        object user
+        bint is_open
+
+    def __cinit__(self):
+        self.is_open = False
+
+    def __dealloc__(self):
+        if self.is_open:
+            self.close()
+
+    def close(self):
+        self._ensure_client()
+        with nogil:
+            check_cstatus(self.client.get().Disconnect())
+        self.is_open = False
+
+    cdef _ensure_client(self):
+        if self.client.get() == NULL:
+            raise IOError('HDFS client improperly initialized')
+        elif not self.is_open:
+            raise IOError('HDFS client is closed')
+
+    @classmethod
+    def connect(cls, host, port, user):
+        """
+
+        Parameters
+        ----------
+        host :
+        port :
+        user :
+
+        Notes
+        -----
+        The first time you call this method, it will take longer than usual due
+        to JNI spin-up time.
+
+        Returns
+        -------
+        client : HDFSClient
+        """
+        cdef:
+            HdfsClient out = HdfsClient()
+            HdfsConnectionConfig conf
+
+        conf.host = tobytes(host)
+        conf.port = port
+        conf.user = tobytes(user)
+
+        with nogil:
+            check_cstatus(
+                CHdfsClient.Connect(&conf, &out.client))
+        out.is_open = True
+
+        return out
+
+    def exists(self, path):
+        """
+        Returns True if the path is known to the cluster, False if it does not
+        (or there is an RPC error)
+        """
+        self._ensure_client()
+
+        cdef c_string c_path = tobytes(path)
+        cdef c_bool result
+        with nogil:
+            result = self.client.get().Exists(c_path)
+        return result
+
+    def ls(self, path, bint full_info=True):
+        """
+        Retrieve directory contents and metadata, if requested.
+
+        Parameters
+        ----------
+        path : HDFS path
+        full_info : boolean, default True
+            If False, only return list of paths
+
+        Returns
+        -------
+        result : list of dicts (full_info=True) or strings (full_info=False)
+        """
+        cdef:
+            c_string c_path = tobytes(path)
+            vector[HdfsPathInfo] listing
+            list results = []
+            int i
+
+        self._ensure_client()
+
+        with nogil:
+            check_cstatus(self.client.get()
+                          .ListDirectory(c_path, &listing))
+
+        cdef const HdfsPathInfo* info
+        for i in range(listing.size()):
+            info = &listing[i]
+
+            # Try to trim off the hdfs://HOST:PORT piece
+            name = strip_hdfs_abspath(frombytes(info.name))
+
+            if full_info:
+                kind = ('file' if info.kind == ObjectType_FILE
+                        else 'directory')
+
+                results.append({
+                    'kind': kind,
+                    'name': name,
+                    'owner': frombytes(info.owner),
+                    'group': frombytes(info.group),
+                    'list_modified_time': info.last_modified_time,
+                    'list_access_time': info.last_access_time,
+                    'size': info.size,
+                    'replication': info.replication,
+                    'block_size': info.block_size,
+                    'permissions': info.permissions
+                })
+            else:
+                results.append(name)
+
+        return results
+
+    def mkdir(self, path):
+        """
+        Create indicated directory and any necessary parent directories
+        """
+        self._ensure_client()
+
+        cdef c_string c_path = tobytes(path)
+        with nogil:
+            check_cstatus(self.client.get()
+                          .CreateDirectory(c_path))
+
+    def delete(self, path, bint recursive=False):
+        """
+        Delete the indicated file or directory
+
+        Parameters
+        ----------
+        path : string
+        recursive : boolean, default False
+            If True, also delete child paths for directories
+        """
+        self._ensure_client()
+
+        cdef c_string c_path = tobytes(path)
+        with nogil:
+            check_cstatus(self.client.get()
+                          .Delete(c_path, recursive))
+
+    def open(self, path, mode='rb', buffer_size=None, replication=None,
+             default_block_size=None):
+        """
+        Parameters
+        ----------
+        mode : string, 'rb', 'wb', 'ab'
+        """
+        self._ensure_client()
+
+        cdef HdfsFile out = HdfsFile()
+
+        if mode not in ('rb', 'wb', 'ab'):
+            raise Exception("Mode must be 'rb' (read), "
+                            "'wb' (write, new file), or 'ab' (append)")
+
+        cdef c_string c_path = tobytes(path)
+        cdef c_bool append = False
+
+        # 0 in libhdfs means "use the default"
+        cdef int32_t c_buffer_size = buffer_size or 0
+        cdef int16_t c_replication = replication or 0
+        cdef int64_t c_default_block_size = default_block_size or 0
+
+        if mode in ('wb', 'ab'):
+            if mode == 'ab':
+                append = True
+
+            with nogil:
+                check_cstatus(
+                    self.client.get()
+                    .OpenWriteable(c_path, append, c_buffer_size,
+                                   c_replication, c_default_block_size,
+                                   &out.wr_file))
+
+            out.is_readonly = False
+        else:
+            with nogil:
+                check_cstatus(self.client.get()
+                              .OpenReadable(c_path, &out.rd_file))
+            out.is_readonly = True
+
+        if c_buffer_size == 0:
+            c_buffer_size = 2 ** 16
+
+        out.mode = mode
+        out.buffer_size = c_buffer_size
+        out.parent = self
+        out.is_open = True
+
+        return out
+
+    def upload(self, path, stream, buffer_size=2**16):
+        """
+        Upload file-like object to HDFS path
+        """
+        write_queue = Queue(50)
+
+        f = self.open(path, 'wb')
+
+        done = False
+        exc_info = None
+        def bg_write():
+            try:
+                while not done or write_queue.qsize() > 0:
+                    try:
+                        buf = write_queue.get(timeout=0.01)
+                    except QueueEmpty:
+                        continue
+
+                    f.write(buf)
+
+            except Exception as e:
+                exc_info = sys.exc_info()
+
+        writer_thread = threading.Thread(target=bg_write)
+        writer_thread.start()
+
+        try:
+            while True:
+                buf = stream.read(buffer_size)
+                if not buf:
+                    break
+
+                write_queue.put_nowait(buf)
+        finally:
+            done = True
+
+        writer_thread.join()
+        if exc_info is not None:
+            raise exc_info[0], exc_info[1], exc_info[2]
+
+    def download(self, path, stream, buffer_size=None):
+        f = self.open(path, 'rb', buffer_size=buffer_size)
+        f.download(stream)
+
+
+cdef class HdfsFile:
+    cdef:
+        shared_ptr[HdfsReadableFile] rd_file
+        shared_ptr[HdfsWriteableFile] wr_file
+        bint is_readonly
+        bint is_open
+        object parent
+
+    cdef readonly:
+        int32_t buffer_size
+        object mode
+
+    def __cinit__(self):
+        self.is_open = False
+
+    def __dealloc__(self):
+        if self.is_open:
+            self.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, tb):
+        self.close()
+
+    def close(self):
+        if self.is_open:
+            with nogil:
+                if self.is_readonly:
+                    check_cstatus(self.rd_file.get().Close())
+                else:
+                    check_cstatus(self.wr_file.get().Close())
+        self.is_open = False
+
+    cdef _assert_readable(self):
+        if not self.is_readonly:
+            raise IOError("only valid on readonly files")
+
+    cdef _assert_writeable(self):
+        if self.is_readonly:
+            raise IOError("only valid on writeonly files")
+
+    def size(self):
+        cdef int64_t size
+        self._assert_readable()
+        with nogil:
+            check_cstatus(self.rd_file.get().GetSize(&size))
+        return size
+
+    def tell(self):
+        cdef int64_t position
+        with nogil:
+            if self.is_readonly:
+                check_cstatus(self.rd_file.get().Tell(&position))
+            else:
+                check_cstatus(self.wr_file.get().Tell(&position))
+        return position
+
+    def seek(self, int64_t position):
+        self._assert_readable()
+        with nogil:
+            check_cstatus(self.rd_file.get().Seek(position))
+
+    def read(self, int nbytes):
+        """
+        Read indicated number of bytes from the file, up to EOF
+        """
+        cdef:
+            int32_t bytes_read = 0
+            uint8_t* buf
+
+        self._assert_readable()
+
+        # This isn't ideal -- PyBytes_FromStringAndSize copies the data from
+        # the passed buffer, so it's hard for us to avoid doubling the memory
+        buf = <uint8_t*> malloc(nbytes)
+        if buf == NULL:
+            raise MemoryError("Failed to allocate {0} bytes".format(nbytes))
+
+        cdef int32_t total_bytes = 0
+
+        cdef int rpc_chunksize = min(self.buffer_size, nbytes)
+
+        try:
+            with nogil:
+                while total_bytes < nbytes:
+                    check_cstatus(self.rd_file.get()
+                                  .Read(rpc_chunksize, &bytes_read,
+                                        buf + total_bytes))
+
+                    total_bytes += bytes_read
+
+                    # EOF
+                    if bytes_read == 0:
+                        break
+            result = cp.PyBytes_FromStringAndSize(<const char*>buf,
+                                                  total_bytes)
+        finally:
+            free(buf)
+
+        return result
+
+    def download(self, stream_or_path):
+        """
+        Read file completely to local path (rather than reading completely into
+        memory). First seeks to the beginning of the file.
+        """
+        cdef:
+            int32_t bytes_read = 0
+            uint8_t* buf
+        self._assert_readable()
+
+        write_queue = Queue(50)
+
+        if not hasattr(stream_or_path, 'read'):
+            stream = open(stream_or_path, 'wb')
+            cleanup = lambda: stream.close()
+        else:
+            stream = stream_or_path
+            cleanup = lambda: None
+
+        done = False
+        exc_info = None
+        def bg_write():
+            try:
+                while not done or write_queue.qsize() > 0:
+                    try:
+                        buf = write_queue.get(timeout=0.01)
+                    except QueueEmpty:
+                        continue
+                    stream.write(buf)
+            except Exception as e:
+                exc_info = sys.exc_info()
+            finally:
+                cleanup()
+
+        self.seek(0)
+
+        writer_thread = threading.Thread(target=bg_write)
+
+        # This isn't ideal -- PyBytes_FromStringAndSize copies the data from
+        # the passed buffer, so it's hard for us to avoid doubling the memory
+        buf = <uint8_t*> malloc(self.buffer_size)
+        if buf == NULL:
+            raise MemoryError("Failed to allocate {0} bytes"
+                              .format(self.buffer_size))
+
+        writer_thread.start()
+
+        cdef int64_t total_bytes = 0
+
+        try:
+            while True:
+                with nogil:
+                    check_cstatus(self.rd_file.get()
+                                  .Read(self.buffer_size, &bytes_read, buf))
+
+                total_bytes += bytes_read
+
+                # EOF
+                if bytes_read == 0:
+                    break
+
+                pybuf = cp.PyBytes_FromStringAndSize(<const char*>buf,
+                                                     bytes_read)
+
+                write_queue.put_nowait(pybuf)
+        finally:
+            free(buf)
+            done = True
+
+        writer_thread.join()
+        if exc_info is not None:
+            raise exc_info[0], exc_info[1], exc_info[2]
+
+    def write(self, data):
+        """
+        Write bytes-like (unicode, encoded to UTF-8) to file
+        """
+        self._assert_writeable()
+
+        data = tobytes(data)
+
+        cdef const uint8_t* buf = <const uint8_t*> cp.PyBytes_AS_STRING(data)
+        cdef int32_t bufsize = len(data)
+        with nogil:
+            check_cstatus(self.wr_file.get().Write(buf, bufsize))

http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/python/pyarrow/tests/test_array.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index bf5a220..86147f8 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -15,25 +15,24 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from pyarrow.compat import unittest
 import pyarrow
 import pyarrow.formatting as fmt
 
 
-class TestArrayAPI(unittest.TestCase):
+def test_repr_on_pre_init_array():
+    arr = pyarrow.array.Array()
+    assert len(repr(arr)) > 0
 
-    def test_repr_on_pre_init_array(self):
-        arr = pyarrow.array.Array()
-        assert len(repr(arr)) > 0
 
-    def test_getitem_NA(self):
-        arr = pyarrow.from_pylist([1, None, 2])
-        assert arr[1] is pyarrow.NA
+def test_getitem_NA():
+    arr = pyarrow.from_pylist([1, None, 2])
+    assert arr[1] is pyarrow.NA
 
-    def test_list_format(self):
-        arr = pyarrow.from_pylist([[1], None, [2, 3, None]])
-        result = fmt.array_format(arr)
-        expected = """\
+
+def test_list_format():
+    arr = pyarrow.from_pylist([[1], None, [2, 3, None]])
+    result = fmt.array_format(arr)
+    expected = """\
 [
   [1],
   NA,
@@ -41,23 +40,25 @@ class TestArrayAPI(unittest.TestCase):
    3,
    NA]
 ]"""
-        assert result == expected
+    assert result == expected
+
 
-    def test_string_format(self):
-        arr = pyarrow.from_pylist(['', None, 'foo'])
-        result = fmt.array_format(arr)
-        expected = """\
+def test_string_format():
+    arr = pyarrow.from_pylist(['', None, 'foo'])
+    result = fmt.array_format(arr)
+    expected = """\
 [
   '',
   NA,
   'foo'
 ]"""
-        assert result == expected
+    assert result == expected
+
 
-    def test_long_array_format(self):
-        arr = pyarrow.from_pylist(range(100))
-        result = fmt.array_format(arr, window=2)
-        expected = """\
+def test_long_array_format():
+    arr = pyarrow.from_pylist(range(100))
+    result = fmt.array_format(arr, window=2)
+    expected = """\
 [
   0,
   1,
@@ -65,4 +66,4 @@ class TestArrayAPI(unittest.TestCase):
   98,
   99
 ]"""
-        assert result == expected
+    assert result == expected

http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/python/pyarrow/tests/test_io.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
new file mode 100644
index 0000000..328e923
--- /dev/null
+++ b/python/pyarrow/tests/test_io.py
@@ -0,0 +1,126 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from io import BytesIO
+from os.path import join as pjoin
+import os
+import random
+
+import pytest
+
+import pyarrow.io as io
+
+#----------------------------------------------------------------------
+# HDFS tests
+
+
+def hdfs_test_client():
+    host = os.environ.get('ARROW_HDFS_TEST_HOST', 'localhost')
+    user = os.environ['ARROW_HDFS_TEST_USER']
+    try:
+        port = int(os.environ.get('ARROW_HDFS_TEST_PORT', 20500))
+    except ValueError:
+        raise ValueError('Env variable ARROW_HDFS_TEST_PORT was not '
+                         'an integer')
+
+    return io.HdfsClient.connect(host, port, user)
+
+
+libhdfs = pytest.mark.skipif(not io.have_libhdfs(),
+                             reason='No libhdfs available on system')
+
+
+HDFS_TMP_PATH = '/tmp/pyarrow-test-{0}'.format(random.randint(0, 1000))
+
+@pytest.fixture(scope='session')
+def hdfs(request):
+    fixture = hdfs_test_client()
+    def teardown():
+        fixture.delete(HDFS_TMP_PATH, recursive=True)
+        fixture.close()
+    request.addfinalizer(teardown)
+    return fixture
+
+
+@libhdfs
+def test_hdfs_close():
+    client = hdfs_test_client()
+    assert client.is_open
+    client.close()
+    assert not client.is_open
+
+    with pytest.raises(Exception):
+        client.ls('/')
+
+
+@libhdfs
+def test_hdfs_mkdir(hdfs):
+    path = pjoin(HDFS_TMP_PATH, 'test-dir/test-dir')
+    parent_path = pjoin(HDFS_TMP_PATH, 'test-dir')
+
+    hdfs.mkdir(path)
+    assert hdfs.exists(path)
+
+    hdfs.delete(parent_path, recursive=True)
+    assert not hdfs.exists(path)
+
+
+@libhdfs
+def test_hdfs_ls(hdfs):
+    base_path = pjoin(HDFS_TMP_PATH, 'ls-test')
+    hdfs.mkdir(base_path)
+
+    dir_path = pjoin(base_path, 'a-dir')
+    f1_path = pjoin(base_path, 'a-file-1')
+
+    hdfs.mkdir(dir_path)
+
+    f = hdfs.open(f1_path, 'wb')
+    f.write('a' * 10)
+
+    contents = sorted(hdfs.ls(base_path, False))
+    assert contents == [dir_path, f1_path]
+
+
+@libhdfs
+def test_hdfs_download_upload(hdfs):
+    base_path = pjoin(HDFS_TMP_PATH, 'upload-test')
+
+    data = b'foobarbaz'
+    buf = BytesIO(data)
+    buf.seek(0)
+
+    hdfs.upload(base_path, buf)
+
+    out_buf = BytesIO()
+    hdfs.download(base_path, out_buf)
+    out_buf.seek(0)
+    assert out_buf.getvalue() == data
+
+
+@libhdfs
+def test_hdfs_file_context_manager(hdfs):
+    path = pjoin(HDFS_TMP_PATH, 'ctx-manager')
+
+    data = b'foo'
+    with hdfs.open(path, 'wb') as f:
+        f.write(data)
+
+    with hdfs.open(path, 'rb') as f:
+        assert f.size() == 3
+        result = f.read(10)
+        assert result == data

http://git-wip-us.apache.org/repos/asf/arrow/blob/ef908302/python/setup.py
----------------------------------------------------------------------
diff --git a/python/setup.py b/python/setup.py
index 7edeb91..59410d7 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -214,7 +214,14 @@ class build_ext(_build_ext):
             return name + suffix
 
     def get_cmake_cython_names(self):
-        return ['array', 'config', 'error', 'parquet', 'scalar', 'schema', 'table']
+        return ['array',
+                'config',
+                'error',
+                'io',
+                'parquet',
+                'scalar',
+                'schema',
+                'table']
 
     def get_names(self):
         return self._found_names


Mime
View raw message