arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [2/2] arrow git commit: ARROW-1142: [C++] Port over compression toolchain and interfaces from parquet-cpp, use Arrow-style error handling
Date Fri, 23 Jun 2017 23:07:02 GMT
ARROW-1142: [C++] Port over compression toolchain and interfaces from parquet-cpp, use Arrow-style error handling

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #771 from wesm/import-parquet-compression and squashes the following commits:

b7609f8e [Wes McKinney] Boost toolchain tweaks
44e77a64 [Wes McKinney] Install compression.h
732e426e [Wes McKinney] Revert bash equality test
31b2705a [Wes McKinney] cpplint
2ef43de2 [Wes McKinney] Clean up RAT exclusions, use absolute paths
cbbaecf0 [Wes McKinney] Add some license headers, use Apache Kudu approach for managing a lot of RAT exclusions
198dee16 [Wes McKinney] Clean up build dependencies
da31c2cb [Wes McKinney] Port over compression toolchain and interfaces from parquet-cpp, adapt to Arrow-style error handling


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/98f7cac6
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/98f7cac6
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/98f7cac6

Branch: refs/heads/master
Commit: 98f7cac6e162d9775d615d07b9867c1ec0030f82
Parents: 1514016
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Fri Jun 23 19:06:55 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Fri Jun 23 19:06:55 2017 -0400

----------------------------------------------------------------------
 .readthedocs.yml                            |  17 +
 .travis.yml                                 |  17 +
 ci/travis_script_cpp.sh                     |   2 +-
 cpp/.clang-format                           |  28 +-
 cpp/.clang-tidy                             |  19 +-
 cpp/.clang-tidy-ignore                      |  16 +
 cpp/CMakeLists.txt                          | 505 ++----------------
 cpp/cmake_modules/FindBrotli.cmake          | 116 ++++
 cpp/cmake_modules/FindSnappy.cmake          |  94 ++++
 cpp/cmake_modules/FindZLIB.cmake            | 105 ++++
 cpp/cmake_modules/SnappyCMakeLists.txt      |  85 +++
 cpp/cmake_modules/SnappyConfig.h            |  36 ++
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 641 +++++++++++++++++++++++
 cpp/src/arrow/python/CMakeLists.txt         |   4 +-
 cpp/src/arrow/util/CMakeLists.txt           |   7 +-
 cpp/src/arrow/util/compression-test.cc      |  89 ++++
 cpp/src/arrow/util/compression.cc           | 327 ++++++++++++
 cpp/src/arrow/util/compression.h            | 109 ++++
 cpp/src/arrow/util/logging.h                |   7 +-
 cpp/src/plasma/malloc.cc                    |   2 +-
 cpp/src/plasma/test/client_tests.cc         |   2 +-
 dev/release/check-rat-report.py             |  59 +++
 dev/release/rat_exclude_files.txt           |  66 +++
 dev/release/run-rat.sh                      |  64 +--
 24 files changed, 1890 insertions(+), 527 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/.readthedocs.yml
----------------------------------------------------------------------
diff --git a/.readthedocs.yml b/.readthedocs.yml
index 2e1fe3f..11a7d70 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -1,2 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 conda:
     file: python/doc/environment.yml

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index a32562f..315cbd2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 sudo: required
 dist: trusty
 addons:

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/ci/travis_script_cpp.sh
----------------------------------------------------------------------
diff --git a/ci/travis_script_cpp.sh b/ci/travis_script_cpp.sh
index d555cab..c368a1d 100755
--- a/ci/travis_script_cpp.sh
+++ b/ci/travis_script_cpp.sh
@@ -17,7 +17,7 @@ set -e
 : ${CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build}
 
 # Check licenses according to Apache policy
-git archive HEAD -o arrow-src.tar.gz
+git archive HEAD --prefix=apache-arrow/ --output=arrow-src.tar.gz
 ./dev/release/run-rat.sh arrow-src.tar.gz
 
 pushd $CPP_BUILD_DIR

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/.clang-format
----------------------------------------------------------------------
diff --git a/cpp/.clang-format b/cpp/.clang-format
index 7d5b3cf..33f282a 100644
--- a/cpp/.clang-format
+++ b/cpp/.clang-format
@@ -1,34 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 ---
 Language:        Cpp
 # BasedOnStyle:  Google
 AccessModifierOffset: -1
-AlignAfterOpenBracket: false 
+AlignAfterOpenBracket: false
 AlignConsecutiveAssignments: false
 AlignEscapedNewlinesLeft: true
 AlignOperands:   true
 AlignTrailingComments: true
 AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: true 
+AllowShortBlocksOnASingleLine: true
 AllowShortCaseLabelsOnASingleLine: false
 AllowShortFunctionsOnASingleLine: Inline
 AllowShortIfStatementsOnASingleLine: true
-AllowShortLoopsOnASingleLine: false 
+AllowShortLoopsOnASingleLine: false
 AlwaysBreakAfterDefinitionReturnType: None
 AlwaysBreakBeforeMultilineStrings: true
 AlwaysBreakTemplateDeclarations: true
 BinPackArguments: true
-BinPackParameters: true 
+BinPackParameters: true
 BreakBeforeBinaryOperators: None
 BreakBeforeBraces: Attach
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializersBeforeComma: false
-ColumnLimit: 90 
+ColumnLimit: 90
 CommentPragmas:  '^ IWYU pragma:'
 ConstructorInitializerAllOnOneLineOrOnePerLine: true
 ConstructorInitializerIndentWidth: 4
 ContinuationIndentWidth: 4
 Cpp11BracedListStyle: true
-DerivePointerAlignment: false 
+DerivePointerAlignment: false
 DisableFormat:   false
 ExperimentalAutoDetectBinPacking: false
 ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/.clang-tidy
----------------------------------------------------------------------
diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy
index deaa9bd..b6b5a81 100644
--- a/cpp/.clang-tidy
+++ b/cpp/.clang-tidy
@@ -1,8 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 ---
 Checks:          'clang-diagnostic-*,clang-analyzer-*,-clang-analyzer-alpha*,google-.*,modernize-.*,readablity-.*'
 HeaderFilterRegex: 'arrow/.*'
 AnalyzeTemporaryDtors: true
-CheckOptions:    
+CheckOptions:
   - key:             google-readability-braces-around-statements.ShortStatementLines
     value:           '1'
   - key:             google-readability-function-size.StatementThreshold
@@ -11,4 +27,3 @@ CheckOptions:
     value:           '10'
   - key:             google-readability-namespace-comments.SpacesBeforeComments
     value:           '2'
-

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/.clang-tidy-ignore
----------------------------------------------------------------------
diff --git a/cpp/.clang-tidy-ignore b/cpp/.clang-tidy-ignore
index 5ab4d20..3270b97 100644
--- a/cpp/.clang-tidy-ignore
+++ b/cpp/.clang-tidy-ignore
@@ -1,2 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 ipc-adapter-test.cc
 memory-pool-test.cc

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5ba56e5..49e1d97 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -136,6 +136,18 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   option(ARROW_PLASMA
     "Build the plasma object store along with Arrow"
     OFF)
+
+  option(ARROW_ZLIB_VENDORED
+    "Build our own zlib (some libz.a aren't configured for static linking)"
+    ON)
+  if (MSVC)
+    set(BROTLI_MSVC_STATIC_LIB_SUFFIX "_static" CACHE STRING
+      "Brotli static lib suffix used on Windows with MSVC (default _static)")
+    set(SNAPPY_MSVC_STATIC_LIB_SUFFIX "" CACHE STRING
+      "Snappy static lib suffix used on Windows with MSVC (default is empty string)")
+    set(ZLIB_MSVC_STATIC_LIB_SUFFIX "libstatic" CACHE STRING
+      "Zlib static lib suffix used on Windows with MSVC (default libstatic)")
+  endif()
 endif()
 
 if(ARROW_BUILD_TESTS)
@@ -166,9 +178,14 @@ if (ARROW_NO_DEPRECATED_API)
   add_definitions(-DARROW_NO_DEPRECATED_API)
 endif()
 
+############################################################
+# Dependencies
+############################################################
+
+include(ThirdpartyToolchain)
+
 # Add common flags
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_COMMON_FLAGS}")
-set(EP_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARROW_CXXFLAGS}")
 
 message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
@@ -401,444 +418,6 @@ endfunction()
 enable_testing()
 
 ############################################################
-# Dependencies
-############################################################
-
-# ----------------------------------------------------------------------
-# Thirdparty toolchain
-
-set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty")
-set(GFLAGS_VERSION "2.1.2")
-set(GTEST_VERSION "1.8.0")
-set(GBENCHMARK_VERSION "1.1.0")
-set(FLATBUFFERS_VERSION "1.6.0")
-set(JEMALLOC_VERSION "4.4.0")
-
-if (NOT "$ENV{ARROW_BUILD_TOOLCHAIN}" STREQUAL "")
-  set(FLATBUFFERS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
-  set(RAPIDJSON_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
-  set(JEMALLOC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
-  set(GFLAGS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
-
-  if (NOT DEFINED ENV{BOOST_ROOT})
-    # Since we have to set this in the environment, we check whether
-    # $BOOST_ROOT is defined inside here
-    set(ENV{BOOST_ROOT} "$ENV{ARROW_BUILD_TOOLCHAIN}")
-  endif()
-endif()
-
-if (DEFINED ENV{FLATBUFFERS_HOME})
-  set(FLATBUFFERS_HOME "$ENV{FLATBUFFERS_HOME}")
-endif()
-
-if (DEFINED ENV{RAPIDJSON_HOME})
-  set(RAPIDJSON_HOME "$ENV{RAPIDJSON_HOME}")
-endif()
-
-if (DEFINED ENV{JEMALLOC_HOME})
-  set(JEMALLOC_HOME "$ENV{JEMALLOC_HOME}")
-endif()
-
-if (DEFINED ENV{GFLAGS_HOME})
-  set(GFLAGS_HOME "$ENV{GFLAGS_HOME}")
-endif()
-
-# ----------------------------------------------------------------------
-# Find pthreads
-
-if (NOT MSVC)
-  find_library(PTHREAD_LIBRARY pthread)
-  message(STATUS "Found pthread: ${PTHREAD_LIBRARY}")
-endif()
-
-# ----------------------------------------------------------------------
-# Add Boost dependencies (code adapted from Apache Kudu (incubating))
-
-set(Boost_DEBUG TRUE)
-set(Boost_USE_MULTITHREADED ON)
-set(Boost_ADDITIONAL_VERSIONS
-  "1.63.0" "1.63"
-  "1.62.0" "1.61"
-  "1.61.0" "1.62"
-  "1.60.0" "1.60")
-
-if (ARROW_BOOST_USE_SHARED)
-  # Find shared Boost libraries.
-  set(Boost_USE_STATIC_LIBS OFF)
-
-  if(MSVC)
-    # disable autolinking in boost
-    add_definitions(-DBOOST_ALL_NO_LIB)
-
-    # force all boost libraries to dynamic link
-    add_definitions(-DBOOST_ALL_DYN_LINK)
-  endif()
-
-  if (ARROW_BOOST_HEADER_ONLY)
-    find_package(Boost)
-  else()
-    find_package(Boost COMPONENTS system filesystem REQUIRED)
-    if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
-      set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
-      set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
-    else()
-      set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
-      set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
-    endif()
-    set(BOOST_SYSTEM_LIBRARY boost_system_shared)
-    set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared)
-  endif()
-else()
-  # Find static boost headers and libs
-  # TODO Differentiate here between release and debug builds
-  set(Boost_USE_STATIC_LIBS ON)
-  if (ARROW_BOOST_HEADER_ONLY)
-    find_package(Boost)
-  else()
-    find_package(Boost COMPONENTS system filesystem regex REQUIRED)
-    if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
-      set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
-      set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
-    else()
-      set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
-      set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
-    endif()
-    set(BOOST_SYSTEM_LIBRARY boost_system_static)
-    set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
-  endif()
-endif()
-
-message(STATUS "Boost include dir: " ${Boost_INCLUDE_DIRS})
-message(STATUS "Boost libraries: " ${Boost_LIBRARIES})
-
-if (NOT ARROW_BOOST_HEADER_ONLY)
-  ADD_THIRDPARTY_LIB(boost_system
-      STATIC_LIB "${BOOST_STATIC_SYSTEM_LIBRARY}"
-      SHARED_LIB "${BOOST_SHARED_SYSTEM_LIBRARY}")
-
-  ADD_THIRDPARTY_LIB(boost_filesystem
-      STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}"
-      SHARED_LIB "${BOOST_SHARED_FILESYSTEM_LIBRARY}")
-
-  SET(ARROW_BOOST_LIBS boost_system boost_filesystem)
-endif()
-
-include_directories(SYSTEM ${Boost_INCLUDE_DIR})
-
-if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
-  add_custom_target(unittest ctest -L unittest)
-
-  if("$ENV{GTEST_HOME}" STREQUAL "")
-    if(APPLE)
-      set(GTEST_CMAKE_CXX_FLAGS "-fPIC -DGTEST_USE_OWN_TR1_TUPLE=1 -Wno-unused-value -Wno-ignored-attributes")
-    elseif(NOT MSVC)
-      set(GTEST_CMAKE_CXX_FLAGS "-fPIC")
-    endif()
-    string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
-    set(GTEST_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}} ${GTEST_CMAKE_CXX_FLAGS}")
-
-    set(GTEST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/googletest_ep-prefix/src/googletest_ep")
-    set(GTEST_INCLUDE_DIR "${GTEST_PREFIX}/include")
-    set(GTEST_STATIC_LIB
-      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(GTEST_MAIN_STATIC_LIB
-      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(GTEST_VENDORED 1)
-    set(GTEST_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                         -DCMAKE_INSTALL_PREFIX=${GTEST_PREFIX}
-                         -Dgtest_force_shared_crt=ON
-                         -DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS})
-
-    if (CMAKE_VERSION VERSION_GREATER "3.2")
-      # BUILD_BYPRODUCTS is a 3.2+ feature
-      ExternalProject_Add(googletest_ep
-        URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
-        BUILD_BYPRODUCTS ${GTEST_STATIC_LIB} ${GTEST_MAIN_STATIC_LIB}
-        CMAKE_ARGS ${GTEST_CMAKE_ARGS})
-    else()
-      ExternalProject_Add(googletest_ep
-        URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
-        CMAKE_ARGS ${GTEST_CMAKE_ARGS})
-    endif()
-  else()
-    find_package(GTest REQUIRED)
-    set(GTEST_VENDORED 0)
-  endif()
-
-  message(STATUS "GTest include dir: ${GTEST_INCLUDE_DIR}")
-  message(STATUS "GTest static library: ${GTEST_STATIC_LIB}")
-  include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
-  ADD_THIRDPARTY_LIB(gtest
-    STATIC_LIB ${GTEST_STATIC_LIB})
-  ADD_THIRDPARTY_LIB(gtest_main
-    STATIC_LIB ${GTEST_MAIN_STATIC_LIB})
-
-  if(GTEST_VENDORED)
-    add_dependencies(gtest googletest_ep)
-    add_dependencies(gtest_main googletest_ep)
-  endif()
-
-  # gflags (formerly Googleflags) command line parsing
-  if("${GFLAGS_HOME}" STREQUAL "")
-    set(GFLAGS_CMAKE_CXX_FLAGS ${EP_CXX_FLAGS})
-
-    set(GFLAGS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gflags_ep-prefix/src/gflags_ep")
-    set(GFLAGS_HOME "${GFLAGS_PREFIX}")
-    set(GFLAGS_INCLUDE_DIR "${GFLAGS_PREFIX}/include")
-    if(MSVC)
-      set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/gflags_static.lib")
-    else()
-      set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/libgflags.a")
-    endif()
-    set(GFLAGS_VENDORED 1)
-    set(GFLAGS_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                          -DCMAKE_INSTALL_PREFIX=${GFLAGS_PREFIX}
-                          -DBUILD_SHARED_LIBS=OFF
-                          -DBUILD_STATIC_LIBS=ON
-                          -DBUILD_PACKAGING=OFF
-                          -DBUILD_TESTING=OFF
-                          -BUILD_CONFIG_TESTS=OFF
-                          -DINSTALL_HEADERS=ON
-                          -DCMAKE_CXX_FLAGS=${GFLAGS_CMAKE_CXX_FLAGS})
-    if (CMAKE_VERSION VERSION_GREATER "3.2")
-      # BUILD_BYPRODUCTS is a 3.2+ feature
-      ExternalProject_Add(gflags_ep
-        GIT_REPOSITORY https://github.com/gflags/gflags.git
-        GIT_TAG cce68f0c9c5d054017425e6e6fd54f696d36e8ee
-        BUILD_IN_SOURCE 1
-        BUILD_BYPRODUCTS "${GFLAGS_STATIC_LIB}"
-        CMAKE_ARGS ${GFLAGS_CMAKE_ARGS})
-    else()
-      ExternalProject_Add(gflags_ep
-        GIT_REPOSITORY https://github.com/gflags/gflags.git
-        GIT_TAG cce68f0c9c5d054017425e6e6fd54f696d36e8ee
-        BUILD_IN_SOURCE 1
-        CMAKE_ARGS ${GFLAGS_CMAKE_ARGS})
-    endif()
-  else()
-    set(GFLAGS_VENDORED 0)
-    find_package(GFlags REQUIRED)
-  endif()
-
-  message(STATUS "GFlags include dir: ${GFLAGS_INCLUDE_DIR}")
-  message(STATUS "GFlags static library: ${GFLAGS_STATIC_LIB}")
-  include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR})
-  ADD_THIRDPARTY_LIB(gflags
-    STATIC_LIB ${GFLAGS_STATIC_LIB})
-  if(MSVC)
-    set_target_properties(gflags
-      PROPERTIES
-      IMPORTED_LINK_INTERFACE_LIBRARIES "shlwapi.lib")
-  endif()
-
-  if(GFLAGS_VENDORED)
-    add_dependencies(gflags gflags_ep)
-  endif()
-endif()
-
-if(ARROW_BUILD_BENCHMARKS)
-  add_custom_target(runbenchmark ctest -L benchmark)
-
-  if("$ENV{GBENCHMARK_HOME}" STREQUAL "")
-    if(APPLE)
-      set(GBENCHMARK_CMAKE_CXX_FLAGS "-fPIC -std=c++11 -stdlib=libc++")
-    elseif(NOT MSVC)
-      set(GBENCHMARK_CMAKE_CXX_FLAGS "-fPIC --std=c++11")
-    endif()
-
-    set(GBENCHMARK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gbenchmark_ep/src/gbenchmark_ep-install")
-    set(GBENCHMARK_INCLUDE_DIR "${GBENCHMARK_PREFIX}/include")
-    set(GBENCHMARK_STATIC_LIB "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(GBENCHMARK_VENDORED 1)
-    set(GBENCHMARK_CMAKE_ARGS
-          "-DCMAKE_BUILD_TYPE=Release"
-          "-DCMAKE_INSTALL_PREFIX:PATH=${GBENCHMARK_PREFIX}"
-          "-DBENCHMARK_ENABLE_TESTING=OFF"
-          "-DCMAKE_CXX_FLAGS=${GBENCHMARK_CMAKE_CXX_FLAGS}")
-    if (APPLE)
-      set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DBENCHMARK_USE_LIBCXX=ON")
-    endif()
-    if (CMAKE_VERSION VERSION_GREATER "3.2")
-      # BUILD_BYPRODUCTS is a 3.2+ feature
-      ExternalProject_Add(gbenchmark_ep
-        URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
-        BUILD_BYPRODUCTS "${GBENCHMARK_STATIC_LIB}"
-        CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS})
-    else()
-      ExternalProject_Add(gbenchmark_ep
-        URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
-        CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS})
-    endif()
-  else()
-    find_package(GBenchmark REQUIRED)
-    set(GBENCHMARK_VENDORED 0)
-  endif()
-
-  message(STATUS "GBenchmark include dir: ${GBENCHMARK_INCLUDE_DIR}")
-  message(STATUS "GBenchmark static library: ${GBENCHMARK_STATIC_LIB}")
-  include_directories(SYSTEM ${GBENCHMARK_INCLUDE_DIR})
-  ADD_THIRDPARTY_LIB(benchmark
-    STATIC_LIB ${GBENCHMARK_STATIC_LIB})
-
-  if(GBENCHMARK_VENDORED)
-    add_dependencies(benchmark gbenchmark_ep)
-  endif()
-endif()
-
-if (ARROW_IPC)
-  # RapidJSON, header only dependency
-  if("${RAPIDJSON_HOME}" STREQUAL "")
-    ExternalProject_Add(rapidjson_ep
-      PREFIX "${CMAKE_BINARY_DIR}"
-      URL "https://github.com/miloyip/rapidjson/archive/v1.1.0.tar.gz"
-      URL_MD5 "badd12c511e081fec6c89c43a7027bce"
-      CONFIGURE_COMMAND ""
-      BUILD_COMMAND ""
-      BUILD_IN_SOURCE 1
-      INSTALL_COMMAND "")
-
-    ExternalProject_Get_Property(rapidjson_ep SOURCE_DIR)
-    set(RAPIDJSON_INCLUDE_DIR "${SOURCE_DIR}/include")
-    set(RAPIDJSON_VENDORED 1)
-  else()
-    set(RAPIDJSON_INCLUDE_DIR "${RAPIDJSON_HOME}/include")
-    set(RAPIDJSON_VENDORED 0)
-  endif()
-  message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}")
-  include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR})
-
-  ## Flatbuffers
-  if("${FLATBUFFERS_HOME}" STREQUAL "")
-    set(FLATBUFFERS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers_ep-prefix/src/flatbuffers_ep-install")
-    ExternalProject_Add(flatbuffers_ep
-      URL "https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz"
-      CMAKE_ARGS
-      "-DCMAKE_CXX_FLAGS=-fPIC"
-      "-DCMAKE_INSTALL_PREFIX:PATH=${FLATBUFFERS_PREFIX}"
-      "-DFLATBUFFERS_BUILD_TESTS=OFF")
-
-    set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_PREFIX}/include")
-    set(FLATBUFFERS_COMPILER "${FLATBUFFERS_PREFIX}/bin/flatc")
-    set(FLATBUFFERS_VENDORED 1)
-  else()
-    find_package(Flatbuffers REQUIRED)
-    set(FLATBUFFERS_VENDORED 0)
-  endif()
-
-  message(STATUS "Flatbuffers include dir: ${FLATBUFFERS_INCLUDE_DIR}")
-  message(STATUS "Flatbuffers compiler: ${FLATBUFFERS_COMPILER}")
-  include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
-endif()
-#----------------------------------------------------------------------
-
-if (MSVC)
-  # jemalloc is not supported on Windows
-  set(ARROW_JEMALLOC off)
-endif()
-
-if (ARROW_JEMALLOC)
-  find_package(jemalloc)
-
-  if(NOT JEMALLOC_FOUND)
-    set(ARROW_JEMALLOC_USE_SHARED OFF)
-    set(JEMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/jemalloc_ep/dist/")
-    set(JEMALLOC_HOME "${JEMALLOC_PREFIX}")
-    set(JEMALLOC_INCLUDE_DIR "${JEMALLOC_PREFIX}/include")
-    set(JEMALLOC_SHARED_LIB "${JEMALLOC_PREFIX}/lib/libjemalloc${CMAKE_SHARED_LIBRARY_SUFFIX}")
-    set(JEMALLOC_STATIC_LIB "${JEMALLOC_PREFIX}/lib/libjemalloc_pic${CMAKE_STATIC_LIBRARY_SUFFIX}")
-    set(JEMALLOC_VENDORED 1)
-    if (CMAKE_VERSION VERSION_GREATER "3.2")
-      # BUILD_BYPRODUCTS is a 3.2+ feature
-      ExternalProject_Add(jemalloc_ep
-        URL https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_VERSION}/jemalloc-${JEMALLOC_VERSION}.tar.bz2
-        CONFIGURE_COMMAND ./configure "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix="
-        BUILD_IN_SOURCE 1
-        BUILD_COMMAND ${CMAKE_MAKE_PROGRAM}
-        BUILD_BYPRODUCTS "${JEMALLOC_STATIC_LIB}" "${JEMALLOC_SHARED_LIB}"
-        INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -j1 install)
-    else()
-      ExternalProject_Add(jemalloc_ep
-        URL https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_VERSION}/jemalloc-${JEMALLOC_VERSION}.tar.bz2
-        CONFIGURE_COMMAND ./configure "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix="
-        BUILD_IN_SOURCE 1
-        BUILD_COMMAND ${CMAKE_MAKE_PROGRAM}
-        INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -j1 install)
-    endif()
-  else()
-    set(JEMALLOC_VENDORED 0)
-  endif()
-
-  include_directories(SYSTEM ${JEMALLOC_INCLUDE_DIR})
-  ADD_THIRDPARTY_LIB(jemalloc
-    STATIC_LIB ${JEMALLOC_STATIC_LIB}
-    SHARED_LIB ${JEMALLOC_SHARED_LIB}
-    DEPS ${PTHREAD_LIBRARY})
-endif()
-
-## Google PerfTools
-##
-## Disabled with TSAN/ASAN as well as with gold+dynamic linking (see comment
-## near definition of ARROW_USING_GOLD).
-# find_package(GPerf REQUIRED)
-# if (NOT "${ARROW_USE_ASAN}" AND
-#     NOT "${ARROW_USE_TSAN}" AND
-#     NOT ("${ARROW_USING_GOLD}" AND "${ARROW_LINK}" STREQUAL "d"))
-#   ADD_THIRDPARTY_LIB(tcmalloc
-#     STATIC_LIB "${TCMALLOC_STATIC_LIB}"
-#     SHARED_LIB "${TCMALLOC_SHARED_LIB}")
-#   ADD_THIRDPARTY_LIB(profiler
-#     STATIC_LIB "${PROFILER_STATIC_LIB}"
-#     SHARED_LIB "${PROFILER_SHARED_LIB}")
-#   list(APPEND ARROW_BASE_LIBS tcmalloc profiler)
-#   add_definitions("-DTCMALLOC_ENABLED")
-#   set(ARROW_TCMALLOC_AVAILABLE 1)
-# endif()
-
-########################################################################
-# HDFS thirdparty setup
-
-if (DEFINED ENV{HADOOP_HOME})
-  set(HADOOP_HOME $ENV{HADOOP_HOME})
-  if (NOT EXISTS "${HADOOP_HOME}/include/hdfs.h")
-    message(STATUS "Did not find hdfs.h in expected location, using vendored one")
-    set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop")
-  endif()
-else()
-  set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop")
-endif()
-
-set(HDFS_H_PATH "${HADOOP_HOME}/include/hdfs.h")
-if (NOT EXISTS ${HDFS_H_PATH})
-  message(FATAL_ERROR "Did not find hdfs.h at ${HDFS_H_PATH}")
-endif()
-message(STATUS "Found hdfs.h at: " ${HDFS_H_PATH})
-
-include_directories(SYSTEM "${HADOOP_HOME}/include")
-
-############################################################
-# Linker setup
-############################################################
-set(ARROW_MIN_TEST_LIBS
-  ${ARROW_STATIC_LINK_LIBS}
-  arrow_static
-  gtest
-  gtest_main
-  ${ARROW_BASE_LIBS})
-
-if(NOT MSVC)
-  set(ARROW_MIN_TEST_LIBS
-    ${ARROW_MIN_TEST_LIBS}
-    ${CMAKE_DL_LIBS})
-endif()
-
-set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
-
-set(ARROW_BENCHMARK_LINK_LIBS
-  arrow_static
-  arrow_benchmark_main
-  ${ARROW_BASE_LIBS})
-
-############################################################
 # "make ctags" target
 ############################################################
 if (UNIX)
@@ -936,16 +515,41 @@ if (${CLANG_TIDY_FOUND})
 
 endif()
 
-
-
 ############################################################
-# Subdirectories
+# Linker and Dependencies
 ############################################################
 
-set(ARROW_LINK_LIBS
-    )
+set(ARROW_STATIC_LINK_LIBS
+  brotli_dec
+  brotli_enc
+  brotli_common
+  snappy
+  zlib)
+
+set(ARROW_DEPENDENCIES
+  ${ARROW_STATIC_LINK_LIBS})
 
-set(ARROW_STATIC_LINK_LIBS)
+set(ARROW_MIN_TEST_LIBS
+  arrow_static
+  ${ARROW_STATIC_LINK_LIBS}
+  gtest
+  gtest_main)
+
+if(NOT MSVC)
+  set(ARROW_MIN_TEST_LIBS
+    ${ARROW_MIN_TEST_LIBS}
+    ${CMAKE_DL_LIBS})
+endif()
+
+set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
+
+set(ARROW_BENCHMARK_LINK_LIBS
+  arrow_static
+  arrow_benchmark_main
+  ${ARROW_STATIC_LINK_LIBS})
+
+set(ARROW_LINK_LIBS
+  ${ARROW_STATIC_LINK_LIBS})
 
 set(ARROW_SHARED_PRIVATE_LINK_LIBS
   ${BOOST_SYSTEM_LIBRARY}
@@ -1009,13 +613,9 @@ elseif (NOT MSVC)
     ${PTHREAD_LIBRARY})
 endif()
 
-if(RAPIDJSON_VENDORED)
-  set(ARROW_DEPENDENCIES ${ARROW_DEPENDENCIES} rapidjson_ep)
-endif()
-
-if(FLATBUFFERS_VENDORED)
-  set(ARROW_DEPENDENCIES ${ARROW_DEPENDENCIES} flatbuffers_ep)
-endif()
+############################################################
+# Subdirectories
+############################################################
 
 if(NOT WIN32 AND ARROW_PLASMA)
   add_subdirectory(src/plasma)
@@ -1048,6 +648,7 @@ set(ARROW_SRCS
   src/arrow/io/memory.cc
 
   src/arrow/util/bit-util.cc
+  src/arrow/util/compression.cc
   src/arrow/util/decimal.cc
   src/arrow/util/key_value_metadata.cc
 )

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/cmake_modules/FindBrotli.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindBrotli.cmake b/cpp/cmake_modules/FindBrotli.cmake
new file mode 100644
index 0000000..f2e714c
--- /dev/null
+++ b/cpp/cmake_modules/FindBrotli.cmake
@@ -0,0 +1,116 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Tries to find Brotli headers and libraries.
+#
+# Usage of this module as follows:
+#
+#  find_package(Brotli)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  Brotli_HOME - When set, this path is inspected instead of standard library
+#                locations as the root of the Brotli installation.
+#                The environment variable BROTLI_HOME overrides this veriable.
+#
+# This module defines
+#  BROTLI_INCLUDE_DIR, directory containing headers
+#  BROTLI_LIBS, directory containing brotli libraries
+#  BROTLI_STATIC_LIB, path to libbrotli.a
+#  BROTLI_SHARED_LIB, path to libbrotli's shared library
+#  BROTLI_FOUND, whether brotli has been found
+
+if( NOT "${BROTLI_HOME}" STREQUAL "")
+    file( TO_CMAKE_PATH "${BROTLI_HOME}" _native_path )
+    list( APPEND _brotli_roots ${_native_path} )
+elseif ( Brotli_HOME )
+    list( APPEND _brotli_roots ${Brotli_HOME} )
+endif()
+
+find_path( BROTLI_INCLUDE_DIR NAMES brotli/decode.h
+  PATHS ${_brotli_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "include" )
+
+find_library( BROTLI_LIBRARY_ENC NAMES libbrotlienc.a brotlienc
+  PATHS ${_brotli_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib" )
+
+find_library( BROTLI_LIBRARY_DEC NAMES libbrotlidec.a brotlidec
+  PATHS ${_brotli_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib" )
+
+find_library( BROTLI_LIBRARY_COMMON NAMES libbrotlicommon.a brotlicommon
+  PATHS ${_brotli_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib" )
+
+set(BROTLI_LIBRARIES ${BROTLI_LIBRARY_ENC} ${BROTLI_LIBRARY_DEC}
+    ${BROTLI_LIBRARY_COMMON})
+
+if (BROTLI_INCLUDE_DIR AND (PARQUET_MINIMAL_DEPENDENCY OR BROTLI_LIBRARIES))
+  set(BROTLI_FOUND TRUE)
+  get_filename_component( BROTLI_LIBS ${BROTLI_LIBRARY_ENC} PATH )
+  set(BROTLI_LIB_NAME brotli)
+  if (MSVC AND NOT BROTLI_MSVC_STATIC_LIB_SUFFIX)
+    set(BROTLI_MSVC_STATIC_LIB_SUFFIX _static)
+  endif()
+  set(BROTLI_STATIC_LIB
+      ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}enc${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
+      ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}dec${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
+      ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}common${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(BROTLI_STATIC_LIBRARY_ENC ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}enc${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(BROTLI_STATIC_LIBRARY_DEC ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}dec${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(BROTLI_STATIC_LIBRARY_COMMON ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}common${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(BROTLI_SHARED_LIB
+      ${BROTLI_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${BROTLI_LIB_NAME}enc${CMAKE_SHARED_LIBRARY_SUFFIX}
+      ${BROTLI_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${BROTLI_LIB_NAME}dec${CMAKE_SHARED_LIBRARY_SUFFIX}
+      ${BROTLI_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${BROTLI_LIB_NAME}common${CMAKE_SHARED_LIBRARY_SUFFIX})
+else ()
+  set(BROTLI_FOUND FALSE)
+endif ()
+
+if (BROTLI_FOUND)
+  if (NOT Brotli_FIND_QUIETLY)
+    if (PARQUET_MINIMAL_DEPENDENCY)
+      message(STATUS "Found the Brotli headers: ${BROTLI_INCLUDE_DIR}")
+    else ()
+      message(STATUS "Found the Brotli library: ${BROTLI_LIBRARIES}")
+    endif ()
+  endif ()
+else ()
+  if (NOT Brotli_FIND_QUIETLY)
+    set(BROTLI_ERR_MSG "Could not find the Brotli library. Looked in ")
+    if ( _brotli_roots )
+      set(BROTLI_ERR_MSG "${BROTLI_ERR_MSG} in ${_brotli_roots}.")
+    else ()
+      set(BROTLI_ERR_MSG "${BROTLI_ERR_MSG} system search paths.")
+    endif ()
+    if (Brotli_FIND_REQUIRED)
+      message(FATAL_ERROR "${BROTLI_ERR_MSG}")
+    else (Brotli_FIND_REQUIRED)
+      message(STATUS "${BROTLI_ERR_MSG}")
+    endif (Brotli_FIND_REQUIRED)
+  endif ()
+endif ()
+
+mark_as_advanced(
+  BROTLI_INCLUDE_DIR
+  BROTLI_LIBS
+  BROTLI_LIBRARIES
+  BROTLI_STATIC_LIB
+  BROTLI_SHARED_LIB
+)

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/cmake_modules/FindSnappy.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindSnappy.cmake b/cpp/cmake_modules/FindSnappy.cmake
new file mode 100644
index 0000000..867963c
--- /dev/null
+++ b/cpp/cmake_modules/FindSnappy.cmake
@@ -0,0 +1,94 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Tries to find Snappy headers and libraries.
+#
+# Usage of this module as follows:
+#
+#  find_package(Snappy)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  Snappy_HOME - When set, this path is inspected instead of standard library
+#                locations as the root of the Snappy installation.
+#                The environment variable SNAPPY_HOME overrides this variable.
+#
+# This module defines
+#  SNAPPY_INCLUDE_DIR, directory containing headers
+#  SNAPPY_LIBS, directory containing snappy libraries
+#  SNAPPY_STATIC_LIB, path to libsnappy.a
+#  SNAPPY_SHARED_LIB, path to libsnappy's shared library
+#  SNAPPY_FOUND, whether snappy has been found
+
+if( NOT "${SNAPPY_HOME}" STREQUAL "")
+    file( TO_CMAKE_PATH "${SNAPPY_HOME}" _native_path )
+    list( APPEND _snappy_roots ${_native_path} )
+elseif ( Snappy_HOME )
+    list( APPEND _snappy_roots ${Snappy_HOME} )
+endif()
+
+message(STATUS "SNAPPY_HOME: ${SNAPPY_HOME}")
+find_path(SNAPPY_INCLUDE_DIR snappy.h HINTS
+  ${_snappy_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "include")
+
+find_library( SNAPPY_LIBRARIES NAMES snappy PATHS
+  ${_snappy_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "lib")
+
+if (SNAPPY_INCLUDE_DIR AND (PARQUET_MINIMAL_DEPENDENCY OR SNAPPY_LIBRARIES))
+  set(SNAPPY_FOUND TRUE)
+  get_filename_component( SNAPPY_LIBS ${SNAPPY_LIBRARIES} PATH )
+  set(SNAPPY_HEADER_NAME snappy.h)
+  set(SNAPPY_HEADER ${SNAPPY_INCLUDE_DIR}/${SNAPPY_HEADER_NAME})
+  set(SNAPPY_LIB_NAME snappy)
+  set(SNAPPY_STATIC_LIB ${SNAPPY_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_LIB_NAME}${SNAPPY_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(SNAPPY_SHARED_LIB ${SNAPPY_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${SNAPPY_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+else ()
+  set(SNAPPY_FOUND FALSE)
+endif ()
+
+if (SNAPPY_FOUND)
+  if (NOT Snappy_FIND_QUIETLY)
+    if (PARQUET_MINIMAL_DEPENDENCY)
+      message(STATUS "Found the Snappy header: ${SNAPPY_HEADER}")
+    else ()
+      message(STATUS "Found the Snappy library: ${SNAPPY_LIBRARIES}")
+    endif ()
+  endif ()
+else ()
+  if (NOT Snappy_FIND_QUIETLY)
+    set(SNAPPY_ERR_MSG "Could not find the Snappy library. Looked in ")
+    if ( _snappy_roots )
+      set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} in ${_snappy_roots}.")
+    else ()
+      set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} system search paths.")
+    endif ()
+    if (Snappy_FIND_REQUIRED)
+      message(FATAL_ERROR "${SNAPPY_ERR_MSG}")
+    else (Snappy_FIND_REQUIRED)
+      message(STATUS "${SNAPPY_ERR_MSG}")
+    endif (Snappy_FIND_REQUIRED)
+  endif ()
+endif ()
+
+mark_as_advanced(
+  SNAPPY_INCLUDE_DIR
+  SNAPPY_LIBS
+  SNAPPY_LIBRARIES
+  SNAPPY_STATIC_LIB
+  SNAPPY_SHARED_LIB
+)

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/cmake_modules/FindZLIB.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindZLIB.cmake b/cpp/cmake_modules/FindZLIB.cmake
new file mode 100644
index 0000000..78b84f2
--- /dev/null
+++ b/cpp/cmake_modules/FindZLIB.cmake
@@ -0,0 +1,105 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Tries to find ZLIB headers and libraries.
+#
+# Usage of this module as follows:
+#
+#  find_package(ZLIB)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  ZLIB_HOME - When set, this path is inspected instead of standard library
+#             locations as the root of the ZLIB installation.
+#             The environment variable ZLIB_HOME overrides this variable.
+#
+# - Find ZLIB (zlib.h, libz.a, libz.so, and libz.so.1)
+# This module defines
+#  ZLIB_INCLUDE_DIR, directory containing headers
+#  ZLIB_LIBS, directory containing zlib libraries
+#  ZLIB_STATIC_LIB, path to libz.a
+#  ZLIB_SHARED_LIB, path to libz's shared library
+#  ZLIB_FOUND, whether zlib has been found
+
+if( NOT "${ZLIB_HOME}" STREQUAL "")
+    file( TO_CMAKE_PATH "${ZLIB_HOME}" _native_path )
+    list( APPEND _zlib_roots ${_native_path} )
+elseif ( ZLIB_HOME )
+    list( APPEND _zlib_roots ${ZLIB_HOME} )
+endif()
+
+# Try the parameterized roots, if they exist
+if ( _zlib_roots )
+    find_path( ZLIB_INCLUDE_DIR NAMES zlib.h
+        PATHS ${_zlib_roots} NO_DEFAULT_PATH
+        PATH_SUFFIXES "include" )
+    find_library( ZLIB_LIBRARIES NAMES libz.a zlib
+        PATHS ${_zlib_roots} NO_DEFAULT_PATH
+        PATH_SUFFIXES "lib" )
+else ()
+    find_path( ZLIB_INCLUDE_DIR NAMES zlib.h )
+    # Only look for the static library
+    find_library( ZLIB_LIBRARIES NAMES libz.a zlib )
+endif ()
+
+
+if (ZLIB_INCLUDE_DIR AND (PARQUET_MINIMAL_DEPENDENCY OR ZLIB_LIBRARIES))
+  set(ZLIB_FOUND TRUE)
+  get_filename_component( ZLIB_LIBS ${ZLIB_LIBRARIES} PATH )
+  set(ZLIB_HEADER_NAME zlib.h)
+  set(ZLIB_HEADER ${ZLIB_INCLUDE_DIR}/${ZLIB_HEADER_NAME})
+  set(ZLIB_LIB_NAME z)
+  if (MSVC)
+    if (NOT ZLIB_MSVC_STATIC_LIB_SUFFIX)
+      set(ZLIB_MSVC_STATIC_LIB_SUFFIX libstatic)
+    endif()
+    set(ZLIB_MSVC_SHARED_LIB_SUFFIX lib)
+  endif()
+  set(ZLIB_STATIC_LIB ${ZLIB_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${ZLIB_LIB_NAME}${ZLIB_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(ZLIB_SHARED_LIB ${ZLIB_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${ZLIB_LIB_NAME}${ZLIB_MSVC_SHARED_LIB_SUFFIX}${CMAKE_SHARED_LIBRARY_SUFFIX})
+else ()
+  set(ZLIB_FOUND FALSE)
+endif ()
+
+if (ZLIB_FOUND)
+  if (NOT ZLIB_FIND_QUIETLY)
+    if (PARQUET_MINIMAL_DEPENDENCY)
+      message(STATUS "Found the ZLIB header: ${ZLIB_HEADER}")
+    else()
+      message(STATUS "Found the ZLIB library: ${ZLIB_LIBRARIES}")
+    endif ()
+  endif ()
+else ()
+  if (NOT ZLIB_FIND_QUIETLY)
+    set(ZLIB_ERR_MSG "Could not find the ZLIB library. Looked in ")
+    if ( _zlib_roots )
+      set(ZLIB_ERR_MSG "${ZLIB_ERR_MSG} in ${_zlib_roots}.")
+    else ()
+      set(ZLIB_ERR_MSG "${ZLIB_ERR_MSG} system search paths.")
+    endif ()
+    if (ZLIB_FIND_REQUIRED)
+      message(FATAL_ERROR "${ZLIB_ERR_MSG}")
+    else (ZLIB_FIND_REQUIRED)
+      message(STATUS "${ZLIB_ERR_MSG}")
+    endif (ZLIB_FIND_REQUIRED)
+  endif ()
+endif ()
+
+mark_as_advanced(
+  ZLIB_INCLUDE_DIR
+  ZLIB_LIBS
+  ZLIB_LIBRARIES
+  ZLIB_STATIC_LIB
+  ZLIB_SHARED_LIB
+)

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/cmake_modules/SnappyCMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/SnappyCMakeLists.txt b/cpp/cmake_modules/SnappyCMakeLists.txt
new file mode 100644
index 0000000..9d0a166
--- /dev/null
+++ b/cpp/cmake_modules/SnappyCMakeLists.txt
@@ -0,0 +1,85 @@
+# Copyright 2008 Google Inc. All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
+PROJECT(snappy)
+
+INCLUDE(CheckIncludeFiles)
+INCLUDE(CMakePackageConfigHelpers)
+
+CHECK_INCLUDE_FILES("stdint.h" HAVE_STDINT_H)
+CHECK_INCLUDE_FILES("stddef.h" HAVE_STDDEF_H)
+CHECK_INCLUDE_FILES("sys/uio.h" HAVE_SYS_UIO_H)
+
+if (NOT HAVE_SYS_UIO_H)
+  set(HAVE_SYS_UIO_H 0)
+endif()
+
+if (NOT HAVE_STDINT_H)
+  set(HAVE_STDINT_H 0)
+endif()
+
+if (NOT HAVE_STDDEF_H)
+  set(HAVE_STDDEF_H 0)
+endif()
+
+set(ac_cv_have_stdint_h ${HAVE_STDINT_H})
+set(ac_cv_have_stddef_h ${HAVE_STDDEF_H})
+set(ac_cv_have_sys_uio_h ${HAVE_SYS_UIO_H})
+CONFIGURE_FILE(${snappy_SOURCE_DIR}/snappy-stubs-public.h.in
+               snappy-stubs-public.h)
+
+if (WIN32)
+  ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS)
+endif()
+
+set(SNAPPY_SRCS snappy.cc
+  snappy-c.cc
+  snappy-stubs-internal.cc
+  snappy-sinksource.cc
+  snappy.h
+  snappy-c.h
+  snappy-sinksource.h
+  snappy-stubs-public.h)
+
+add_library(snappy SHARED ${SNAPPY_SRCS})
+add_library(snappystatic STATIC ${SNAPPY_SRCS})
+
+TARGET_COMPILE_DEFINITIONS(snappy PRIVATE -DHAVE_CONFIG_H)
+TARGET_COMPILE_DEFINITIONS(snappystatic PRIVATE -DHAVE_CONFIG_H)
+
+install(FILES snappy.h
+  snappy-c.h
+  snappy-sinksource.h
+  ${snappy_BINARY_DIR}/snappy-stubs-public.h
+  DESTINATION include)
+
+install(TARGETS snappy snappystatic
+  RUNTIME DESTINATION bin
+  LIBRARY DESTINATION lib
+  ARCHIVE DESTINATION lib)

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/cmake_modules/SnappyConfig.h
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/SnappyConfig.h b/cpp/cmake_modules/SnappyConfig.h
new file mode 100644
index 0000000..74eb776
--- /dev/null
+++ b/cpp/cmake_modules/SnappyConfig.h
@@ -0,0 +1,36 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef SNAPPY_CONFIG_H
+#define SNAPPY_CONFIG_H 1
+
+#if defined(_MSC_VER) && (_MSC_VER <= 1900)
+typedef __int64 ssize_t;
+#endif
+
+#endif

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/cmake_modules/ThirdpartyToolchain.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
new file mode 100644
index 0000000..f6a9bb4
--- /dev/null
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -0,0 +1,641 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# ----------------------------------------------------------------------
+# Thirdparty toolchain
+
+set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty")
+set(GFLAGS_VERSION "2.1.2")
+set(GTEST_VERSION "1.8.0")
+set(GBENCHMARK_VERSION "1.1.0")
+set(FLATBUFFERS_VERSION "1.6.0")
+set(JEMALLOC_VERSION "4.4.0")
+set(SNAPPY_VERSION "1.1.3")
+set(BROTLI_VERSION "v0.6.0")
+
+string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
+
+set(EP_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}")
+set(EP_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}}")
+
+if (NOT MSVC)
+  # Set -fPIC on all external projects
+  set(EP_CXX_FLAGS "${EP_CXX_FLAGS} -fPIC")
+  set(EP_C_FLAGS "${EP_C_FLAGS} -fPIC")
+endif()
+
+if (NOT "$ENV{ARROW_BUILD_TOOLCHAIN}" STREQUAL "")
+  set(FLATBUFFERS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(RAPIDJSON_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(JEMALLOC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(GFLAGS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(SNAPPY_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(ZLIB_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  set(BROTLI_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+
+  if (NOT DEFINED ENV{BOOST_ROOT})
+    # Since we have to set this in the environment, we check whether
+    # $BOOST_ROOT is defined inside here
+    set(ENV{BOOST_ROOT} "$ENV{ARROW_BUILD_TOOLCHAIN}")
+  endif()
+endif()
+
+if (DEFINED ENV{FLATBUFFERS_HOME})
+  set(FLATBUFFERS_HOME "$ENV{FLATBUFFERS_HOME}")
+endif()
+
+if (DEFINED ENV{RAPIDJSON_HOME})
+  set(RAPIDJSON_HOME "$ENV{RAPIDJSON_HOME}")
+endif()
+
+if (DEFINED ENV{JEMALLOC_HOME})
+  set(JEMALLOC_HOME "$ENV{JEMALLOC_HOME}")
+endif()
+
+if (DEFINED ENV{GFLAGS_HOME})
+  set(GFLAGS_HOME "$ENV{GFLAGS_HOME}")
+endif()
+
+if (DEFINED ENV{SNAPPY_HOME})
+  set(SNAPPY_HOME "$ENV{SNAPPY_HOME}")
+endif()
+
+if (DEFINED ENV{ZLIB_HOME})
+  set(ZLIB_HOME "$ENV{ZLIB_HOME}")
+endif()
+
+if (DEFINED ENV{BROTLI_HOME})
+  set(BROTLI_HOME "$ENV{BROTLI_HOME}")
+endif()
+
+# ----------------------------------------------------------------------
+# Find pthreads
+
+if (NOT MSVC)
+  find_library(PTHREAD_LIBRARY pthread)
+  message(STATUS "Found pthread: ${PTHREAD_LIBRARY}")
+endif()
+
+# ----------------------------------------------------------------------
+# Add Boost dependencies (code adapted from Apache Kudu (incubating))
+
+set(Boost_DEBUG TRUE)
+set(Boost_USE_MULTITHREADED ON)
+set(Boost_ADDITIONAL_VERSIONS
+  "1.64.0" "1.64"
+  "1.63.0" "1.63"
+  "1.62.0" "1.61"
+  "1.61.0" "1.62"
+  "1.60.0" "1.60")
+
+if (ARROW_BOOST_USE_SHARED)
+  # Find shared Boost libraries.
+  set(Boost_USE_STATIC_LIBS OFF)
+
+  if(MSVC)
+    # disable autolinking in boost
+    add_definitions(-DBOOST_ALL_NO_LIB)
+
+    # force all boost libraries to dynamic link
+    add_definitions(-DBOOST_ALL_DYN_LINK)
+  endif()
+
+  if (ARROW_BOOST_HEADER_ONLY)
+    find_package(Boost)
+  else()
+    find_package(Boost COMPONENTS system filesystem REQUIRED)
+    if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
+      set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
+      set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
+    else()
+      set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
+      set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
+    endif()
+    set(BOOST_SYSTEM_LIBRARY boost_system_shared)
+    set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared)
+  endif()
+else()
+  # Find static boost headers and libs
+  # TODO Differentiate here between release and debug builds
+  set(Boost_USE_STATIC_LIBS ON)
+  if (ARROW_BOOST_HEADER_ONLY)
+    find_package(Boost)
+  else()
+    find_package(Boost COMPONENTS system filesystem REQUIRED)
+    if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
+      set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
+      set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
+    else()
+      set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
+      set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
+    endif()
+    set(BOOST_SYSTEM_LIBRARY boost_system_static)
+    set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
+  endif()
+endif()
+
+message(STATUS "Boost include dir: " ${Boost_INCLUDE_DIRS})
+message(STATUS "Boost libraries: " ${Boost_LIBRARIES})
+
+if (NOT ARROW_BOOST_HEADER_ONLY)
+  ADD_THIRDPARTY_LIB(boost_system
+      STATIC_LIB "${BOOST_STATIC_SYSTEM_LIBRARY}"
+      SHARED_LIB "${BOOST_SHARED_SYSTEM_LIBRARY}")
+
+  ADD_THIRDPARTY_LIB(boost_filesystem
+      STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}"
+      SHARED_LIB "${BOOST_SHARED_FILESYSTEM_LIBRARY}")
+
+  SET(ARROW_BOOST_LIBS boost_system boost_filesystem)
+endif()
+
+include_directories(SYSTEM ${Boost_INCLUDE_DIR})
+
+if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
+  add_custom_target(unittest ctest -L unittest)
+
+  if("$ENV{GTEST_HOME}" STREQUAL "")
+    if(APPLE)
+      set(GTEST_CMAKE_CXX_FLAGS "-fPIC -DGTEST_USE_OWN_TR1_TUPLE=1 -Wno-unused-value -Wno-ignored-attributes")
+    elseif(NOT MSVC)
+      set(GTEST_CMAKE_CXX_FLAGS "-fPIC")
+    endif()
+    string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
+    set(GTEST_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}} ${GTEST_CMAKE_CXX_FLAGS}")
+
+    set(GTEST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/googletest_ep-prefix/src/googletest_ep")
+    set(GTEST_INCLUDE_DIR "${GTEST_PREFIX}/include")
+    set(GTEST_STATIC_LIB
+      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GTEST_MAIN_STATIC_LIB
+      "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GTEST_VENDORED 1)
+    set(GTEST_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                         -DCMAKE_INSTALL_PREFIX=${GTEST_PREFIX}
+                         -Dgtest_force_shared_crt=ON
+                         -DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS})
+
+    if (CMAKE_VERSION VERSION_GREATER "3.2")
+      # BUILD_BYPRODUCTS is a 3.2+ feature
+      ExternalProject_Add(googletest_ep
+        URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
+        BUILD_BYPRODUCTS ${GTEST_STATIC_LIB} ${GTEST_MAIN_STATIC_LIB}
+        CMAKE_ARGS ${GTEST_CMAKE_ARGS})
+    else()
+      ExternalProject_Add(googletest_ep
+        URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
+        CMAKE_ARGS ${GTEST_CMAKE_ARGS})
+    endif()
+  else()
+    find_package(GTest REQUIRED)
+    set(GTEST_VENDORED 0)
+  endif()
+
+  message(STATUS "GTest include dir: ${GTEST_INCLUDE_DIR}")
+  message(STATUS "GTest static library: ${GTEST_STATIC_LIB}")
+  include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(gtest
+    STATIC_LIB ${GTEST_STATIC_LIB})
+  ADD_THIRDPARTY_LIB(gtest_main
+    STATIC_LIB ${GTEST_MAIN_STATIC_LIB})
+
+  if(GTEST_VENDORED)
+    add_dependencies(gtest googletest_ep)
+    add_dependencies(gtest_main googletest_ep)
+  endif()
+
+  # gflags (formerly Googleflags) command line parsing
+  if("${GFLAGS_HOME}" STREQUAL "")
+    set(GFLAGS_CMAKE_CXX_FLAGS ${EP_CXX_FLAGS})
+
+    set(GFLAGS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gflags_ep-prefix/src/gflags_ep")
+    set(GFLAGS_HOME "${GFLAGS_PREFIX}")
+    set(GFLAGS_INCLUDE_DIR "${GFLAGS_PREFIX}/include")
+    if(MSVC)
+      set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/gflags_static.lib")
+    else()
+      set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/libgflags.a")
+    endif()
+    set(GFLAGS_VENDORED 1)
+    set(GFLAGS_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                          -DCMAKE_INSTALL_PREFIX=${GFLAGS_PREFIX}
+                          -DBUILD_SHARED_LIBS=OFF
+                          -DBUILD_STATIC_LIBS=ON
+                          -DBUILD_PACKAGING=OFF
+                          -DBUILD_TESTING=OFF
+                          -BUILD_CONFIG_TESTS=OFF
+                          -DINSTALL_HEADERS=ON
+                          -DCMAKE_CXX_FLAGS=${GFLAGS_CMAKE_CXX_FLAGS})
+    if (CMAKE_VERSION VERSION_GREATER "3.2")
+      # BUILD_BYPRODUCTS is a 3.2+ feature
+      ExternalProject_Add(gflags_ep
+        GIT_REPOSITORY https://github.com/gflags/gflags.git
+        GIT_TAG cce68f0c9c5d054017425e6e6fd54f696d36e8ee
+        BUILD_IN_SOURCE 1
+        BUILD_BYPRODUCTS "${GFLAGS_STATIC_LIB}"
+        CMAKE_ARGS ${GFLAGS_CMAKE_ARGS})
+    else()
+      ExternalProject_Add(gflags_ep
+        GIT_REPOSITORY https://github.com/gflags/gflags.git
+        GIT_TAG cce68f0c9c5d054017425e6e6fd54f696d36e8ee
+        BUILD_IN_SOURCE 1
+        CMAKE_ARGS ${GFLAGS_CMAKE_ARGS})
+    endif()
+  else()
+    set(GFLAGS_VENDORED 0)
+    find_package(GFlags REQUIRED)
+  endif()
+
+  message(STATUS "GFlags include dir: ${GFLAGS_INCLUDE_DIR}")
+  message(STATUS "GFlags static library: ${GFLAGS_STATIC_LIB}")
+  include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(gflags
+    STATIC_LIB ${GFLAGS_STATIC_LIB})
+  if(MSVC)
+    set_target_properties(gflags
+      PROPERTIES
+      IMPORTED_LINK_INTERFACE_LIBRARIES "shlwapi.lib")
+  endif()
+
+  if(GFLAGS_VENDORED)
+    add_dependencies(gflags gflags_ep)
+  endif()
+endif()
+
+if(ARROW_BUILD_BENCHMARKS)
+  add_custom_target(runbenchmark ctest -L benchmark)
+
+  if("$ENV{GBENCHMARK_HOME}" STREQUAL "")
+    if(APPLE)
+      set(GBENCHMARK_CMAKE_CXX_FLAGS "-fPIC -std=c++11 -stdlib=libc++")
+    elseif(NOT MSVC)
+      set(GBENCHMARK_CMAKE_CXX_FLAGS "-fPIC --std=c++11")
+    endif()
+
+    set(GBENCHMARK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gbenchmark_ep/src/gbenchmark_ep-install")
+    set(GBENCHMARK_INCLUDE_DIR "${GBENCHMARK_PREFIX}/include")
+    set(GBENCHMARK_STATIC_LIB "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(GBENCHMARK_VENDORED 1)
+    set(GBENCHMARK_CMAKE_ARGS
+          "-DCMAKE_BUILD_TYPE=Release"
+          "-DCMAKE_INSTALL_PREFIX:PATH=${GBENCHMARK_PREFIX}"
+          "-DBENCHMARK_ENABLE_TESTING=OFF"
+          "-DCMAKE_CXX_FLAGS=${GBENCHMARK_CMAKE_CXX_FLAGS}")
+    if (APPLE)
+      set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DBENCHMARK_USE_LIBCXX=ON")
+    endif()
+    if (CMAKE_VERSION VERSION_GREATER "3.2")
+      # BUILD_BYPRODUCTS is a 3.2+ feature
+      ExternalProject_Add(gbenchmark_ep
+        URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
+        BUILD_BYPRODUCTS "${GBENCHMARK_STATIC_LIB}"
+        CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS})
+    else()
+      ExternalProject_Add(gbenchmark_ep
+        URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
+        CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS})
+    endif()
+  else()
+    find_package(GBenchmark REQUIRED)
+    set(GBENCHMARK_VENDORED 0)
+  endif()
+
+  message(STATUS "GBenchmark include dir: ${GBENCHMARK_INCLUDE_DIR}")
+  message(STATUS "GBenchmark static library: ${GBENCHMARK_STATIC_LIB}")
+  include_directories(SYSTEM ${GBENCHMARK_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(benchmark
+    STATIC_LIB ${GBENCHMARK_STATIC_LIB})
+
+  if(GBENCHMARK_VENDORED)
+    add_dependencies(benchmark gbenchmark_ep)
+  endif()
+endif()
+
+
+if (ARROW_IPC)
+  # RapidJSON, header only dependency
+  if("${RAPIDJSON_HOME}" STREQUAL "")
+    ExternalProject_Add(rapidjson_ep
+      PREFIX "${CMAKE_BINARY_DIR}"
+      URL "https://github.com/miloyip/rapidjson/archive/v1.1.0.tar.gz"
+      URL_MD5 "badd12c511e081fec6c89c43a7027bce"
+      CONFIGURE_COMMAND ""
+      BUILD_COMMAND ""
+      BUILD_IN_SOURCE 1
+      INSTALL_COMMAND "")
+
+    ExternalProject_Get_Property(rapidjson_ep SOURCE_DIR)
+    set(RAPIDJSON_INCLUDE_DIR "${SOURCE_DIR}/include")
+    set(RAPIDJSON_VENDORED 1)
+  else()
+    set(RAPIDJSON_INCLUDE_DIR "${RAPIDJSON_HOME}/include")
+    set(RAPIDJSON_VENDORED 0)
+  endif()
+  message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}")
+  include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR})
+
+  ## Flatbuffers
+  if("${FLATBUFFERS_HOME}" STREQUAL "")
+    set(FLATBUFFERS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers_ep-prefix/src/flatbuffers_ep-install")
+    ExternalProject_Add(flatbuffers_ep
+      URL "https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz"
+      CMAKE_ARGS
+      "-DCMAKE_CXX_FLAGS=-fPIC"
+      "-DCMAKE_INSTALL_PREFIX:PATH=${FLATBUFFERS_PREFIX}"
+      "-DFLATBUFFERS_BUILD_TESTS=OFF")
+
+    set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_PREFIX}/include")
+    set(FLATBUFFERS_COMPILER "${FLATBUFFERS_PREFIX}/bin/flatc")
+    set(FLATBUFFERS_VENDORED 1)
+  else()
+    find_package(Flatbuffers REQUIRED)
+    set(FLATBUFFERS_VENDORED 0)
+  endif()
+
+  if(RAPIDJSON_VENDORED)
+    set(ARROW_DEPENDENCIES ${ARROW_DEPENDENCIES} rapidjson_ep)
+  endif()
+
+  if(FLATBUFFERS_VENDORED)
+    set(ARROW_DEPENDENCIES ${ARROW_DEPENDENCIES} flatbuffers_ep)
+  endif()
+
+  message(STATUS "Flatbuffers include dir: ${FLATBUFFERS_INCLUDE_DIR}")
+  message(STATUS "Flatbuffers compiler: ${FLATBUFFERS_COMPILER}")
+  include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
+endif()
+#----------------------------------------------------------------------
+
+if (MSVC)
+  # jemalloc is not supported on Windows
+  set(ARROW_JEMALLOC off)
+endif()
+
+if (ARROW_JEMALLOC)
+  find_package(jemalloc)
+
+  if(NOT JEMALLOC_FOUND)
+    set(ARROW_JEMALLOC_USE_SHARED OFF)
+    set(JEMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/jemalloc_ep/dist/")
+    set(JEMALLOC_HOME "${JEMALLOC_PREFIX}")
+    set(JEMALLOC_INCLUDE_DIR "${JEMALLOC_PREFIX}/include")
+    set(JEMALLOC_SHARED_LIB "${JEMALLOC_PREFIX}/lib/libjemalloc${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    set(JEMALLOC_STATIC_LIB "${JEMALLOC_PREFIX}/lib/libjemalloc_pic${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(JEMALLOC_VENDORED 1)
+    if (CMAKE_VERSION VERSION_GREATER "3.2")
+      # BUILD_BYPRODUCTS is a 3.2+ feature
+      ExternalProject_Add(jemalloc_ep
+        URL https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_VERSION}/jemalloc-${JEMALLOC_VERSION}.tar.bz2
+        CONFIGURE_COMMAND ./configure "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix="
+        BUILD_IN_SOURCE 1
+        BUILD_COMMAND ${CMAKE_MAKE_PROGRAM}
+        BUILD_BYPRODUCTS "${JEMALLOC_STATIC_LIB}" "${JEMALLOC_SHARED_LIB}"
+        INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -j1 install)
+    else()
+      ExternalProject_Add(jemalloc_ep
+        URL https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_VERSION}/jemalloc-${JEMALLOC_VERSION}.tar.bz2
+        CONFIGURE_COMMAND ./configure "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix="
+        BUILD_IN_SOURCE 1
+        BUILD_COMMAND ${CMAKE_MAKE_PROGRAM}
+        INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -j1 install)
+    endif()
+  else()
+    set(JEMALLOC_VENDORED 0)
+  endif()
+
+  include_directories(SYSTEM ${JEMALLOC_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(jemalloc
+    STATIC_LIB ${JEMALLOC_STATIC_LIB}
+    SHARED_LIB ${JEMALLOC_SHARED_LIB}
+    DEPS ${PTHREAD_LIBRARY})
+endif()
+
+## Google PerfTools
+##
+## Disabled with TSAN/ASAN as well as with gold+dynamic linking (see comment
+## near definition of ARROW_USING_GOLD).
+# find_package(GPerf REQUIRED)
+# if (NOT "${ARROW_USE_ASAN}" AND
+#     NOT "${ARROW_USE_TSAN}" AND
+#     NOT ("${ARROW_USING_GOLD}" AND "${ARROW_LINK}" STREQUAL "d"))
+#   ADD_THIRDPARTY_LIB(tcmalloc
+#     STATIC_LIB "${TCMALLOC_STATIC_LIB}"
+#     SHARED_LIB "${TCMALLOC_SHARED_LIB}")
+#   ADD_THIRDPARTY_LIB(profiler
+#     STATIC_LIB "${PROFILER_STATIC_LIB}"
+#     SHARED_LIB "${PROFILER_SHARED_LIB}")
+#   list(APPEND ARROW_BASE_LIBS tcmalloc profiler)
+#   add_definitions("-DTCMALLOC_ENABLED")
+#   set(ARROW_TCMALLOC_AVAILABLE 1)
+# endif()
+
+########################################################################
+# HDFS thirdparty setup
+
+if (DEFINED ENV{HADOOP_HOME})
+  set(HADOOP_HOME $ENV{HADOOP_HOME})
+  if (NOT EXISTS "${HADOOP_HOME}/include/hdfs.h")
+    message(STATUS "Did not find hdfs.h in expected location, using vendored one")
+    set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop")
+  endif()
+else()
+  set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop")
+endif()
+
+set(HDFS_H_PATH "${HADOOP_HOME}/include/hdfs.h")
+if (NOT EXISTS ${HDFS_H_PATH})
+  message(FATAL_ERROR "Did not find hdfs.h at ${HDFS_H_PATH}")
+endif()
+message(STATUS "Found hdfs.h at: " ${HDFS_H_PATH})
+
+include_directories(SYSTEM "${HADOOP_HOME}/include")
+
+# ----------------------------------------------------------------------
+# ZLIB
+
+if (NOT ARROW_ZLIB_VENDORED)
+  find_package(ZLIB)
+endif()
+
+if (NOT ZLIB_FOUND)
+  set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install")
+  set(ZLIB_HOME "${ZLIB_PREFIX}")
+  set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include")
+  if (MSVC)
+    if (${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG")
+      set(ZLIB_STATIC_LIB_NAME zlibstaticd.lib)
+    else()
+      set(ZLIB_STATIC_LIB_NAME zlibstatic.lib)
+    endif()
+  else()
+    set(ZLIB_STATIC_LIB_NAME libz.a)
+  endif()
+  set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}")
+  set(ZLIB_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                      -DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}
+                      -DCMAKE_C_FLAGS=${EP_C_FLAGS}
+                      -DBUILD_SHARED_LIBS=OFF)
+
+  if (CMAKE_VERSION VERSION_GREATER "3.2")
+    set(ZLIB_BUILD_BYPRODUCTS BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}")
+  endif()
+  ExternalProject_Add(zlib_ep
+    URL "http://zlib.net/fossils/zlib-1.2.8.tar.gz"
+    ${ZLIB_BUILD_BYPRODUCTS}
+    CMAKE_ARGS ${ZLIB_CMAKE_ARGS})
+  set(ZLIB_VENDORED 1)
+else()
+  set(ZLIB_VENDORED 0)
+endif()
+
+include_directories(SYSTEM ${ZLIB_INCLUDE_DIR})
+ADD_THIRDPARTY_LIB(zlib
+  STATIC_LIB ${ZLIB_STATIC_LIB})
+
+if (ZLIB_VENDORED)
+  add_dependencies(zlib zlib_ep)
+endif()
+
+# ----------------------------------------------------------------------
+# Snappy
+
+## Snappy
+find_package(Snappy)
+if (NOT SNAPPY_FOUND)
+  set(SNAPPY_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/snappy_ep/src/snappy_ep-install")
+  set(SNAPPY_HOME "${SNAPPY_PREFIX}")
+  set(SNAPPY_INCLUDE_DIR "${SNAPPY_PREFIX}/include")
+  if (MSVC)
+    set(SNAPPY_STATIC_LIB_NAME snappystatic)
+  else()
+    set(SNAPPY_STATIC_LIB_NAME snappy)
+  endif()
+  set(SNAPPY_STATIC_LIB "${SNAPPY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(SNAPPY_SRC_URL "https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz")
+
+  if (${UPPERCASE_BUILD_TYPE} EQUAL "RELEASE")
+    if (APPLE)
+      set(SNAPPY_CXXFLAGS "CXXFLAGS='-DNDEBUG -O1'")
+    else()
+      set(SNAPPY_CXXFLAGS "CXXFLAGS='-DNDEBUG -O2'")
+    endif()
+  endif()
+
+  if (CMAKE_VERSION VERSION_GREATER "3.2")
+    set(SNAPPY_BUILD_BYPRODUCTS BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}")
+  endif()
+
+  if (MSVC)
+    set(SNAPPY_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                          "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
+                          "-DCMAKE_C_FLAGS=${EX_C_FLAGS}"
+                          "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
+    set(SNAPPY_UPDATE_COMMAND ${CMAKE_COMMAND} -E copy
+                      ${CMAKE_SOURCE_DIR}/cmake_modules/SnappyCMakeLists.txt
+                      ./CMakeLists.txt &&
+                      ${CMAKE_COMMAND} -E copy
+                      ${CMAKE_SOURCE_DIR}/cmake_modules/SnappyConfig.h
+                      ./config.h)
+    ExternalProject_Add(snappy_ep
+      UPDATE_COMMAND ${SNAPPY_UPDATE_COMMAND}
+      BUILD_IN_SOURCE 1
+      BUILD_COMMAND ${MAKE}
+      INSTALL_DIR ${SNAPPY_PREFIX}
+      URL ${SNAPPY_SRC_URL}
+      CMAKE_ARGS ${SNAPPY_CMAKE_ARGS}
+      ${SNAPPY_BUILD_BYPRODUCTS})
+  else()
+    ExternalProject_Add(snappy_ep
+      CONFIGURE_COMMAND ./configure --with-pic "--prefix=${SNAPPY_PREFIX}" ${SNAPPY_CXXFLAGS}
+      BUILD_IN_SOURCE 1
+      BUILD_COMMAND ${MAKE}
+      INSTALL_DIR ${SNAPPY_PREFIX}
+      URL ${SNAPPY_SRC_URL}
+      ${SNAPPY_BUILD_BYPRODUCTS})
+  endif()
+  set(SNAPPY_VENDORED 1)
+else()
+  set(SNAPPY_VENDORED 0)
+endif()
+
+include_directories(SYSTEM ${SNAPPY_INCLUDE_DIR})
+ADD_THIRDPARTY_LIB(snappy
+  STATIC_LIB ${SNAPPY_STATIC_LIB})
+
+if (SNAPPY_VENDORED)
+  add_dependencies(snappy snappy_ep)
+endif()
+
+# ----------------------------------------------------------------------
+# Brotli
+
+find_package(Brotli)
+if (NOT BROTLI_FOUND)
+  set(BROTLI_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/brotli_ep/src/brotli_ep-install")
+  set(BROTLI_HOME "${BROTLI_PREFIX}")
+  set(BROTLI_INCLUDE_DIR "${BROTLI_PREFIX}/include")
+  if (MSVC)
+    set(BROTLI_LIB_DIR bin)
+  else()
+    set(BROTLI_LIB_DIR lib)
+  endif()
+  set(BROTLI_STATIC_LIBRARY_ENC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(BROTLI_STATIC_LIBRARY_DEC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(BROTLI_STATIC_LIBRARY_COMMON "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(BROTLI_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                        "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
+                        "-DCMAKE_C_FLAGS=${EX_C_FLAGS}"
+                        -DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}
+                        -DCMAKE_INSTALL_LIBDIR=lib/${CMAKE_LIBRARY_ARCHITECTURE}
+                        -DBUILD_SHARED_LIBS=OFF)
+
+  if (CMAKE_VERSION VERSION_GREATER "3.2")
+    set(BROTLI_BUILD_BYPRODUCTS BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" "${BROTLI_STATIC_LIBRARY_DEC}" "${BROTLI_STATIC_LIBRARY_COMMON}")
+  endif()
+
+  ExternalProject_Add(brotli_ep
+    URL "https://github.com/google/brotli/archive/${BROTLI_VERSION}.tar.gz"
+    ${BROTLI_BUILD_BYPRODUCTS}
+    CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
+    STEP_TARGETS headers_copy)
+  if (MSVC)
+    ExternalProject_Get_Property(brotli_ep SOURCE_DIR)
+
+    ExternalProject_Add_Step(brotli_ep headers_copy
+      COMMAND xcopy /E /I include ..\\..\\..\\brotli_ep\\src\\brotli_ep-install\\include /Y
+      DEPENDEES build
+      WORKING_DIRECTORY ${SOURCE_DIR})
+  endif()
+  set(BROTLI_VENDORED 1)
+else()
+  set(BROTLI_VENDORED 0)
+endif()
+
+include_directories(SYSTEM ${BROTLI_INCLUDE_DIR})
+ADD_THIRDPARTY_LIB(brotli_enc
+  STATIC_LIB ${BROTLI_STATIC_LIBRARY_ENC})
+ADD_THIRDPARTY_LIB(brotli_dec
+  STATIC_LIB ${BROTLI_STATIC_LIBRARY_DEC})
+ADD_THIRDPARTY_LIB(brotli_common
+  STATIC_LIB ${BROTLI_STATIC_LIBRARY_COMMON})
+
+if (BROTLI_VENDORED)
+  add_dependencies(brotli_enc brotli_ep)
+  add_dependencies(brotli_dec brotli_ep)
+  add_dependencies(brotli_common brotli_ep)
+endif()

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/arrow/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt
index 3085229..bc2a815 100644
--- a/cpp/src/arrow/python/CMakeLists.txt
+++ b/cpp/src/arrow/python/CMakeLists.txt
@@ -34,8 +34,8 @@ endif()
 
 set(ARROW_PYTHON_MIN_TEST_LIBS
   arrow_python_test_main
-  arrow_python_static
-  arrow_static)
+  arrow_python_shared
+  arrow_shared)
 
 set(ARROW_PYTHON_TEST_LINK_LIBS ${ARROW_PYTHON_MIN_TEST_LIBS})
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index ac7e866..1abcce4 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -22,11 +22,13 @@
 # Headers: top level
 install(FILES
   bit-util.h
+  compression.h
+  key_value_metadata.h
   logging.h
   macros.h
   random.h
+  stl.h
   visibility.h
-  key_value_metadata.h
   DESTINATION include/arrow/util)
 
 #######################################
@@ -51,6 +53,7 @@ if (ARROW_BUILD_BENCHMARKS)
 endif()
 
 ADD_ARROW_TEST(bit-util-test)
-ADD_ARROW_TEST(stl-util-test)
+ADD_ARROW_TEST(compression-test)
 ADD_ARROW_TEST(decimal-test)
 ADD_ARROW_TEST(key-value-metadata-test)
+ADD_ARROW_TEST(stl-util-test)

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/arrow/util/compression-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression-test.cc b/cpp/src/arrow/util/compression-test.cc
new file mode 100644
index 0000000..1a0e5d7
--- /dev/null
+++ b/cpp/src/arrow/util/compression-test.cc
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <gtest/gtest.h>
+#include <string>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/test-common.h"
+#include "arrow/util/compression.h"
+
+using std::string;
+using std::vector;
+
+namespace arrow {
+
+template <typename T>
+void CheckCodecRoundtrip(const vector<uint8_t>& data) {
+  // create multiple compressors to try to break them
+  T c1;
+  T c2;
+
+  int max_compressed_len = static_cast<int>(c1.MaxCompressedLen(data.size(), &data[0]));
+  std::vector<uint8_t> compressed(max_compressed_len);
+  std::vector<uint8_t> decompressed(data.size());
+
+  // compress with c1
+  int64_t actual_size;
+  ASSERT_OK(c1.Compress(
+      data.size(), &data[0], max_compressed_len, &compressed[0], &actual_size));
+  compressed.resize(actual_size);
+
+  // decompress with c2
+  ASSERT_OK(c2.Decompress(
+      compressed.size(), &compressed[0], decompressed.size(), &decompressed[0]));
+
+  ASSERT_EQ(data, decompressed);
+
+  // compress with c2
+  int64_t actual_size2;
+  ASSERT_OK(c2.Compress(
+      data.size(), &data[0], max_compressed_len, &compressed[0], &actual_size2));
+  ASSERT_EQ(actual_size2, actual_size);
+
+  // decompress with c1
+  ASSERT_OK(c1.Decompress(
+      compressed.size(), &compressed[0], decompressed.size(), &decompressed[0]));
+
+  ASSERT_EQ(data, decompressed);
+}
+
+template <typename T>
+void CheckCodec() {
+  int sizes[] = {10000, 100000};
+  for (int data_size : sizes) {
+    vector<uint8_t> data(data_size);
+    test::random_bytes(data_size, 1234, data.data());
+    CheckCodecRoundtrip<T>(data);
+  }
+}
+
+TEST(TestCompressors, Snappy) {
+  CheckCodec<SnappyCodec>();
+}
+
+TEST(TestCompressors, Brotli) {
+  CheckCodec<BrotliCodec>();
+}
+
+TEST(TestCompressors, GZip) {
+  CheckCodec<GZipCodec>();
+}
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/arrow/util/compression.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc
new file mode 100644
index 0000000..f82ae5c
--- /dev/null
+++ b/cpp/src/arrow/util/compression.cc
@@ -0,0 +1,327 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/compression.h"
+
+#include <cstdint>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include <brotli/decode.h>
+#include <brotli/encode.h>
+#include <snappy.h>
+#include <zlib.h>
+
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+Codec::~Codec() {}
+
+Status Codec::Create(Compression::type codec_type, std::unique_ptr<Codec>* result) {
+  switch (codec_type) {
+    case Compression::UNCOMPRESSED:
+      break;
+    case Compression::SNAPPY:
+      result->reset(new SnappyCodec());
+      break;
+    case Compression::GZIP:
+      result->reset(new GZipCodec());
+      break;
+    case Compression::LZO:
+      return Status::NotImplemented("LZO codec not implemented");
+    case Compression::BROTLI:
+      result->reset(new BrotliCodec());
+      break;
+    default:
+      return Status::Invalid("Unrecognized codec");
+  }
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// gzip implementation
+
+// These are magic numbers from zlib.h.  Not clear why they are not defined
+// there.
+
+// Maximum window size
+static constexpr int WINDOW_BITS = 15;
+
+// Output Gzip.
+static constexpr int GZIP_CODEC = 16;
+
+// Determine if this is libz or gzip from header.
+static constexpr int DETECT_CODEC = 32;
+
+class GZipCodec::GZipCodecImpl {
+ public:
+  explicit GZipCodecImpl(GZipCodec::Format format)
+      : format_(format),
+        compressor_initialized_(false),
+        decompressor_initialized_(false) {}
+
+  ~GZipCodecImpl() {
+    EndCompressor();
+    EndDecompressor();
+  }
+
+  Status InitCompressor() {
+    EndDecompressor();
+    memset(&stream_, 0, sizeof(stream_));
+
+    int ret;
+    // Initialize to run specified format
+    int window_bits = WINDOW_BITS;
+    if (format_ == DEFLATE) {
+      window_bits = -window_bits;
+    } else if (format_ == GZIP) {
+      window_bits += GZIP_CODEC;
+    }
+    if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, window_bits, 9,
+             Z_DEFAULT_STRATEGY)) != Z_OK) {
+      std::stringstream ss;
+      ss << "zlib deflateInit failed: " << std::string(stream_.msg);
+      return Status::IOError(ss.str());
+    }
+    compressor_initialized_ = true;
+    return Status::OK();
+  }
+
+  void EndCompressor() {
+    if (compressor_initialized_) { (void)deflateEnd(&stream_); }
+    compressor_initialized_ = false;
+  }
+
+  Status InitDecompressor() {
+    EndCompressor();
+    memset(&stream_, 0, sizeof(stream_));
+    int ret;
+
+    // Initialize to run either deflate or zlib/gzip format
+    int window_bits = format_ == DEFLATE ? -WINDOW_BITS : WINDOW_BITS | DETECT_CODEC;
+    if ((ret = inflateInit2(&stream_, window_bits)) != Z_OK) {
+      std::stringstream ss;
+      ss << "zlib inflateInit failed: " << std::string(stream_.msg);
+      return Status::IOError(ss.str());
+    }
+    decompressor_initialized_ = true;
+    return Status::OK();
+  }
+
+  void EndDecompressor() {
+    if (decompressor_initialized_) { (void)inflateEnd(&stream_); }
+    decompressor_initialized_ = false;
+  }
+
+  Status Decompress(int64_t input_length, const uint8_t* input, int64_t output_length,
+      uint8_t* output) {
+    if (!decompressor_initialized_) { RETURN_NOT_OK(InitDecompressor()); }
+    if (output_length == 0) {
+      // The zlib library does not allow *output to be NULL, even when output_length
+      // is 0 (inflate() will return Z_STREAM_ERROR). We don't consider this an
+      // error, so bail early if no output is expected. Note that we don't signal
+      // an error if the input actually contains compressed data.
+      return Status::OK();
+    }
+
+    // Reset the stream for this block
+    if (inflateReset(&stream_) != Z_OK) {
+      std::stringstream ss;
+      ss << "zlib inflateReset failed: " << std::string(stream_.msg);
+      return Status::IOError(ss.str());
+    }
+
+    int ret = 0;
+    // gzip can run in streaming mode or non-streaming mode.  We only
+    // support the non-streaming use case where we present it the entire
+    // compressed input and a buffer big enough to contain the entire
+    // compressed output.  In the case where we don't know the output,
+    // we just make a bigger buffer and try the non-streaming mode
+    // from the beginning again.
+    while (ret != Z_STREAM_END) {
+      stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
+      stream_.avail_in = static_cast<uInt>(input_length);
+      stream_.next_out = reinterpret_cast<Bytef*>(output);
+      stream_.avail_out = static_cast<uInt>(output_length);
+
+      // We know the output size.  In this case, we can use Z_FINISH
+      // which is more efficient.
+      ret = inflate(&stream_, Z_FINISH);
+      if (ret == Z_STREAM_END || ret != Z_OK) break;
+
+      // Failure, buffer was too small
+      std::stringstream ss;
+      ss << "Too small a buffer passed to GZipCodec. InputLength=" << input_length
+         << " OutputLength=" << output_length;
+      return Status::IOError(ss.str());
+    }
+
+    // Failure for some other reason
+    if (ret != Z_STREAM_END) {
+      std::stringstream ss;
+      ss << "GZipCodec failed: ";
+      if (stream_.msg != NULL) ss << stream_.msg;
+      return Status::IOError(ss.str());
+    }
+    return Status::OK();
+  }
+
+  int64_t MaxCompressedLen(int64_t input_length, const uint8_t* input) {
+    // Most be in compression mode
+    if (!compressor_initialized_) {
+      Status s = InitCompressor();
+      DCHECK(s.ok());
+    }
+    // TODO(wesm): deal with zlib < 1.2.3 (see Impala codebase)
+    return deflateBound(&stream_, static_cast<uLong>(input_length));
+  }
+
+  Status Compress(int64_t input_length, const uint8_t* input, int64_t output_buffer_len,
+      uint8_t* output, int64_t* output_length) {
+    if (!compressor_initialized_) { RETURN_NOT_OK(InitCompressor()); }
+    stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
+    stream_.avail_in = static_cast<uInt>(input_length);
+    stream_.next_out = reinterpret_cast<Bytef*>(output);
+    stream_.avail_out = static_cast<uInt>(output_buffer_len);
+
+    int64_t ret = 0;
+    if ((ret = deflate(&stream_, Z_FINISH)) != Z_STREAM_END) {
+      if (ret == Z_OK) {
+        // will return Z_OK (and stream.msg NOT set) if stream.avail_out is too
+        // small
+        return Status::IOError("zlib deflate failed, output buffer too small");
+      }
+      std::stringstream ss;
+      ss << "zlib deflate failed: " << stream_.msg;
+      return Status::IOError(ss.str());
+    }
+
+    if (deflateReset(&stream_) != Z_OK) {
+      std::stringstream ss;
+      ss << "zlib deflateReset failed: " << std::string(stream_.msg);
+      return Status::IOError(ss.str());
+    }
+
+    // Actual output length
+    *output_length = output_buffer_len - stream_.avail_out;
+    return Status::OK();
+  }
+
+ private:
+  // zlib is stateful and the z_stream state variable must be initialized
+  // before
+  z_stream stream_;
+
+  // Realistically, this will always be GZIP, but we leave the option open to
+  // configure
+  GZipCodec::Format format_;
+
+  // These variables are mutually exclusive. When the codec is in "compressor"
+  // state, compressor_initialized_ is true while decompressor_initialized_ is
+  // false. When it's decompressing, the opposite is true.
+  //
+  // Indeed, this is slightly hacky, but the alternative is having separate
+  // Compressor and Decompressor classes. If this ever becomes an issue, we can
+  // perform the refactoring then
+  bool compressor_initialized_;
+  bool decompressor_initialized_;
+};
+
+GZipCodec::GZipCodec(Format format) {
+  impl_.reset(new GZipCodecImpl(format));
+}
+
+GZipCodec::~GZipCodec() {}
+
+Status GZipCodec::Decompress(int64_t input_length, const uint8_t* input,
+    int64_t output_buffer_len, uint8_t* output) {
+  return impl_->Decompress(input_length, input, output_buffer_len, output);
+}
+
+int64_t GZipCodec::MaxCompressedLen(int64_t input_length, const uint8_t* input) {
+  return impl_->MaxCompressedLen(input_length, input);
+}
+
+Status GZipCodec::Compress(int64_t input_length, const uint8_t* input,
+    int64_t output_buffer_len, uint8_t* output, int64_t* output_length) {
+  return impl_->Compress(input_length, input, output_buffer_len, output, output_length);
+}
+
+const char* GZipCodec::name() const {
+  return "gzip";
+}
+
+// ----------------------------------------------------------------------
+// Snappy implementation
+
+Status SnappyCodec::Decompress(
+    int64_t input_len, const uint8_t* input, int64_t output_len, uint8_t* output_buffer) {
+  if (!snappy::RawUncompress(reinterpret_cast<const char*>(input),
+          static_cast<size_t>(input_len), reinterpret_cast<char*>(output_buffer))) {
+    return Status::IOError("Corrupt snappy compressed data.");
+  }
+  return Status::OK();
+}
+
+int64_t SnappyCodec::MaxCompressedLen(int64_t input_len, const uint8_t* input) {
+  return snappy::MaxCompressedLength(input_len);
+}
+
+Status SnappyCodec::Compress(int64_t input_len, const uint8_t* input,
+    int64_t output_buffer_len, uint8_t* output_buffer, int64_t* output_length) {
+  size_t output_len;
+  snappy::RawCompress(reinterpret_cast<const char*>(input),
+      static_cast<size_t>(input_len), reinterpret_cast<char*>(output_buffer),
+      &output_len);
+  *output_length = static_cast<int64_t>(output_len);
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Brotli implementation
+
+Status BrotliCodec::Decompress(
+    int64_t input_len, const uint8_t* input, int64_t output_len, uint8_t* output_buffer) {
+  size_t output_size = output_len;
+  if (BrotliDecoderDecompress(input_len, input, &output_size, output_buffer) !=
+      BROTLI_DECODER_RESULT_SUCCESS) {
+    return Status::IOError("Corrupt brotli compressed data.");
+  }
+  return Status::OK();
+}
+
+int64_t BrotliCodec::MaxCompressedLen(int64_t input_len, const uint8_t* input) {
+  return BrotliEncoderMaxCompressedSize(input_len);
+}
+
+Status BrotliCodec::Compress(int64_t input_len, const uint8_t* input,
+    int64_t output_buffer_len, uint8_t* output_buffer, int64_t* output_length) {
+  size_t output_len = output_buffer_len;
+  // TODO: Make quality configurable. We use 8 as a default as it is the best
+  //       trade-off for Parquet workload
+  if (BrotliEncoderCompress(8, BROTLI_DEFAULT_WINDOW, BROTLI_DEFAULT_MODE, input_len,
+          input, &output_len, output_buffer) == BROTLI_FALSE) {
+    return Status::IOError("Brotli compression failure.");
+  }
+  *output_length = output_len;
+  return Status::OK();
+}
+
+}  // namespace arrow


Mime
View raw message