arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jacq...@apache.org
Subject [17/17] arrow git commit: ARROW-4: This provides an partial C++11 implementation of the Apache Arrow data structures along with a cmake-based build system. The codebase generally follows Google C++ style guide, but more cleaning to be more conforming is
Date Wed, 17 Feb 2016 12:39:52 GMT
ARROW-4: This provides an partial C++11 implementation of the Apache Arrow data structures along with a cmake-based build system. The codebase generally follows Google C++ style guide, but more cleaning to be more conforming is needed. It uses googletest for unit testing.

Feature-wise, this patch includes:

* A small logical data type object model
* Immutable array accessor containers for fixed-width primitive and list types
* A String array container implemented as a List<byte>
* Builder classes for the primitive arrays and list types
* A simple memory management model using immutable and immutable buffers and
  C++ RAII idioms
* Modest unit test coverage for the above features.


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/23c4b08d
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/23c4b08d
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/23c4b08d

Branch: refs/heads/master
Commit: 23c4b08d154f8079806a1f0258d7e4af17bdf5fd
Parents: 16e44e3
Author: Wes McKinney <wes@cloudera.com>
Authored: Tue Feb 16 17:56:05 2016 -0800
Committer: Jacques Nadeau <jacques@apache.org>
Committed: Wed Feb 17 04:39:03 2016 -0800

----------------------------------------------------------------------
 cpp/.gitignore                            |   21 +
 cpp/CMakeLists.txt                        |  483 ++
 cpp/LICENSE.txt                           |  202 +
 cpp/README.md                             |   48 +
 cpp/build-support/asan_symbolize.py       |  360 ++
 cpp/build-support/bootstrap_toolchain.py  |  114 +
 cpp/build-support/cpplint.py              | 6323 ++++++++++++++++++++++++
 cpp/build-support/run-test.sh             |  195 +
 cpp/build-support/stacktrace_addr2line.pl |   92 +
 cpp/cmake_modules/CompilerInfo.cmake      |   46 +
 cpp/cmake_modules/FindGPerf.cmake         |   69 +
 cpp/cmake_modules/FindGTest.cmake         |   91 +
 cpp/cmake_modules/FindParquet.cmake       |   80 +
 cpp/cmake_modules/san-config.cmake        |   92 +
 cpp/setup_build_env.sh                    |   12 +
 cpp/src/arrow/CMakeLists.txt              |   33 +
 cpp/src/arrow/api.h                       |   21 +
 cpp/src/arrow/array-test.cc               |   92 +
 cpp/src/arrow/array.cc                    |   44 +
 cpp/src/arrow/array.h                     |   79 +
 cpp/src/arrow/builder.cc                  |   63 +
 cpp/src/arrow/builder.h                   |  101 +
 cpp/src/arrow/field-test.cc               |   38 +
 cpp/src/arrow/field.h                     |   48 +
 cpp/src/arrow/parquet/CMakeLists.txt      |   35 +
 cpp/src/arrow/test-util.h                 |   97 +
 cpp/src/arrow/type.cc                     |   22 +
 cpp/src/arrow/type.h                      |  180 +
 cpp/src/arrow/types/CMakeLists.txt        |   63 +
 cpp/src/arrow/types/binary.h              |   33 +
 cpp/src/arrow/types/boolean.h             |   35 +
 cpp/src/arrow/types/collection.h          |   45 +
 cpp/src/arrow/types/construct.cc          |   88 +
 cpp/src/arrow/types/construct.h           |   32 +
 cpp/src/arrow/types/datetime.h            |   79 +
 cpp/src/arrow/types/decimal.h             |   32 +
 cpp/src/arrow/types/floating.cc           |   22 +
 cpp/src/arrow/types/floating.h            |   43 +
 cpp/src/arrow/types/integer.cc            |   22 +
 cpp/src/arrow/types/integer.h             |   88 +
 cpp/src/arrow/types/json.cc               |   42 +
 cpp/src/arrow/types/json.h                |   38 +
 cpp/src/arrow/types/list-test.cc          |  166 +
 cpp/src/arrow/types/list.cc               |   31 +
 cpp/src/arrow/types/list.h                |  206 +
 cpp/src/arrow/types/null.h                |   34 +
 cpp/src/arrow/types/primitive-test.cc     |  345 ++
 cpp/src/arrow/types/primitive.cc          |   50 +
 cpp/src/arrow/types/primitive.h           |  240 +
 cpp/src/arrow/types/string-test.cc        |  242 +
 cpp/src/arrow/types/string.cc             |   40 +
 cpp/src/arrow/types/string.h              |  181 +
 cpp/src/arrow/types/struct-test.cc        |   61 +
 cpp/src/arrow/types/struct.cc             |   38 +
 cpp/src/arrow/types/struct.h              |   51 +
 cpp/src/arrow/types/test-common.h         |   50 +
 cpp/src/arrow/types/union.cc              |   49 +
 cpp/src/arrow/types/union.h               |   86 +
 cpp/src/arrow/util/CMakeLists.txt         |   81 +
 cpp/src/arrow/util/bit-util-test.cc       |   44 +
 cpp/src/arrow/util/bit-util.cc            |   46 +
 cpp/src/arrow/util/bit-util.h             |   68 +
 cpp/src/arrow/util/buffer-test.cc         |   58 +
 cpp/src/arrow/util/buffer.cc              |   53 +
 cpp/src/arrow/util/buffer.h               |  133 +
 cpp/src/arrow/util/macros.h               |   26 +
 cpp/src/arrow/util/random.h               |  128 +
 cpp/src/arrow/util/status.cc              |   38 +
 cpp/src/arrow/util/status.h               |  152 +
 cpp/src/arrow/util/test_main.cc           |   26 +
 cpp/thirdparty/build_thirdparty.sh        |   62 +
 cpp/thirdparty/download_thirdparty.sh     |   20 +
 cpp/thirdparty/versions.sh                |    3 +
 73 files changed, 12551 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/.gitignore
----------------------------------------------------------------------
diff --git a/cpp/.gitignore b/cpp/.gitignore
new file mode 100644
index 0000000..ab30247
--- /dev/null
+++ b/cpp/.gitignore
@@ -0,0 +1,21 @@
+thirdparty/
+CMakeFiles/
+CMakeCache.txt
+CTestTestfile.cmake
+Makefile
+cmake_install.cmake
+build/
+Testing/
+
+#########################################
+# Editor temporary/working/backup files #
+.#*
+*\#*\#
+[#]*#
+*~
+*$
+*.bak
+*flymake*
+*.kdev4
+*.log
+*.swp

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
new file mode 100644
index 0000000..90e55df
--- /dev/null
+++ b/cpp/CMakeLists.txt
@@ -0,0 +1,483 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+cmake_minimum_required(VERSION 2.7)
+project(arrow)
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules")
+
+include(CMakeParseArguments)
+
+set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")
+set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty")
+
+# Allow "make install" to not depend on all targets.
+#
+# Must be declared in the top-level CMakeLists.txt.
+set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY true)
+
+# Generate a Clang compile_commands.json "compilation database" file for use
+# with various development tools, such as Vim's YouCompleteMe plugin.
+# See http://clang.llvm.org/docs/JSONCompilationDatabase.html
+if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
+  set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
+endif()
+
+# Enable using a custom GCC toolchain to build Arrow
+if (NOT "$ENV{ARROW_GCC_ROOT}" STREQUAL "")
+  set(GCC_ROOT $ENV{ARROW_GCC_ROOT})
+  set(CMAKE_C_COMPILER ${GCC_ROOT}/bin/gcc)
+  set(CMAKE_CXX_COMPILER ${GCC_ROOT}/bin/g++)
+endif()
+
+# ----------------------------------------------------------------------
+# cmake options
+
+# Top level cmake dir
+if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
+  option(ARROW_WITH_PARQUET
+    "Build the Parquet adapter and link to libparquet"
+    OFF)
+
+  option(ARROW_BUILD_TESTS
+    "Build the Arrow googletest unit tests"
+    ON)
+endif()
+
+if(NOT ARROW_BUILD_TESTS)
+  set(NO_TESTS 1)
+endif()
+
+
+############################################################
+# Compiler flags
+############################################################
+
+# compiler flags that are common across debug/release builds
+#  - msse4.2: Enable sse4.2 compiler intrinsics.
+#  - Wall: Enable all warnings.
+#  - Wno-sign-compare: suppress warnings for comparison between signed and unsigned
+#    integers
+#  -Wno-deprecated: some of the gutil code includes old things like ext/hash_set, ignore that
+#  - pthread: enable multithreaded malloc
+#  - -D__STDC_FORMAT_MACROS: for PRI* print format macros
+#  -fno-strict-aliasing
+#     Assume programs do not follow strict aliasing rules.
+#     GCC cannot always verify whether strict aliasing rules are indeed followed due to
+#     fundamental limitations in escape analysis, which can result in subtle bad code generation.
+#     This has a small perf hit but worth it to avoid hard to debug crashes.
+set(CXX_COMMON_FLAGS "-std=c++11 -fno-strict-aliasing -msse3 -Wall -Wno-deprecated -pthread -D__STDC_FORMAT_MACROS")
+
+# compiler flags for different build types (run 'cmake -DCMAKE_BUILD_TYPE=<type> .')
+# For all builds:
+# For CMAKE_BUILD_TYPE=Debug
+#   -ggdb: Enable gdb debugging
+# For CMAKE_BUILD_TYPE=FastDebug
+#   Same as DEBUG, except with some optimizations on.
+# For CMAKE_BUILD_TYPE=Release
+#   -O3: Enable all compiler optimizations
+#   -g: Enable symbols for profiler tools (TODO: remove for shipping)
+set(CXX_FLAGS_DEBUG "-ggdb")
+set(CXX_FLAGS_FASTDEBUG "-ggdb -O1")
+set(CXX_FLAGS_RELEASE "-O3 -g -DNDEBUG")
+
+set(CXX_FLAGS_PROFILE_GEN "${CXX_FLAGS_RELEASE} -fprofile-generate")
+set(CXX_FLAGS_PROFILE_BUILD "${CXX_FLAGS_RELEASE} -fprofile-use")
+
+# if no build build type is specified, default to debug builds
+if (NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Debug)
+endif(NOT CMAKE_BUILD_TYPE)
+
+string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
+
+
+# Set compile flags based on the build type.
+message("Configured for ${CMAKE_BUILD_TYPE} build (set with cmake -DCMAKE_BUILD_TYPE={release,debug,...})")
+if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
+  set(CMAKE_CXX_FLAGS ${CXX_FLAGS_DEBUG})
+elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "FASTDEBUG")
+  set(CMAKE_CXX_FLAGS ${CXX_FLAGS_FASTDEBUG})
+elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE")
+  set(CMAKE_CXX_FLAGS ${CXX_FLAGS_RELEASE})
+elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "PROFILE_GEN")
+  set(CMAKE_CXX_FLAGS ${CXX_FLAGS_PROFILE_GEN})
+elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "PROFILE_BUILD")
+  set(CMAKE_CXX_FLAGS ${CXX_FLAGS_PROFILE_BUILD})
+else()
+  message(FATAL_ERROR "Unknown build type: ${CMAKE_BUILD_TYPE}")
+endif ()
+
+# Add common flags
+set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}")
+
+# Required to avoid static linking errors with dependencies
+add_definitions(-fPIC)
+
+# Determine compiler version
+include(CompilerInfo)
+
+if ("${COMPILER_FAMILY}" STREQUAL "clang")
+  # Clang helpfully provides a few extensions from C++11 such as the 'override'
+  # keyword on methods. This doesn't change behavior, and we selectively enable
+  # it in src/gutil/port.h only on clang. So, we can safely use it, and don't want
+  # to trigger warnings when we do so.
+  # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++11-extensions")
+
+  # Using Clang with ccache causes a bunch of spurious warnings that are
+  # purportedly fixed in the next version of ccache. See the following for details:
+  #
+  #   http://petereisentraut.blogspot.com/2011/05/ccache-and-clang.html
+  #   http://petereisentraut.blogspot.com/2011/09/ccache-and-clang-part-2.html
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments")
+
+  # Only hardcode -fcolor-diagnostics if stderr is opened on a terminal. Otherwise
+  # the color codes show up as noisy artifacts.
+  #
+  # This test is imperfect because 'cmake' and 'make' can be run independently
+  # (with different terminal options), and we're testing during the former.
+  execute_process(COMMAND test -t 2 RESULT_VARIABLE ARROW_IS_TTY)
+  if ((${ARROW_IS_TTY} EQUAL 0) AND (NOT ("$ENV{TERM}" STREQUAL "dumb")))
+    message("Running in a controlling terminal")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics")
+  else()
+    message("Running without a controlling terminal or in a dumb terminal")
+  endif()
+
+  # Use libstdc++ and not libc++. The latter lacks support for tr1 in OSX
+  # and since 10.9 is now the default.
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++")
+endif()
+
+# Sanity check linking option.
+if (NOT ARROW_LINK)
+  set(ARROW_LINK "d")
+elseif(NOT ("auto" MATCHES "^${ARROW_LINK}" OR
+            "dynamic" MATCHES "^${ARROW_LINK}" OR
+            "static" MATCHES "^${ARROW_LINK}"))
+  message(FATAL_ERROR "Unknown value for ARROW_LINK, must be auto|dynamic|static")
+else()
+  # Remove all but the first letter.
+  string(SUBSTRING "${ARROW_LINK}" 0 1 ARROW_LINK)
+endif()
+
+# ASAN / TSAN / UBSAN
+include(san-config)
+
+# For any C code, use the same flags.
+set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS}")
+
+# Code coverage
+if ("${ARROW_GENERATE_COVERAGE}")
+  if("${CMAKE_CXX_COMPILER}" MATCHES ".*clang.*")
+    # There appears to be some bugs in clang 3.3 which cause code coverage
+    # to have link errors, not locating the llvm_gcda_* symbols.
+    # This should be fixed in llvm 3.4 with http://llvm.org/viewvc/llvm-project?view=revision&revision=184666
+    message(SEND_ERROR "Cannot currently generate coverage with clang")
+  endif()
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage -DCOVERAGE_BUILD")
+
+  # For coverage to work properly, we need to use static linkage. Otherwise,
+  # __gcov_flush() doesn't properly flush coverage from every module.
+  # See http://stackoverflow.com/questions/28164543/using-gcov-flush-within-a-library-doesnt-force-the-other-modules-to-yield-gc
+  if("${ARROW_LINK}" STREQUAL "a")
+    message("Using static linking for coverage build")
+    set(ARROW_LINK "s")
+  elseif("${ARROW_LINK}" STREQUAL "d")
+    message(SEND_ERROR "Cannot use coverage with dynamic linking")
+  endif()
+endif()
+
+# If we still don't know what kind of linking to perform, choose based on
+# build type (developers like fast builds).
+if ("${ARROW_LINK}" STREQUAL "a")
+  if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG" OR
+      "${CMAKE_BUILD_TYPE}" STREQUAL "FASTDEBUG")
+    message("Using dynamic linking for ${CMAKE_BUILD_TYPE} builds")
+    set(ARROW_LINK "d")
+  else()
+    message("Using static linking for ${CMAKE_BUILD_TYPE} builds")
+    set(ARROW_LINK "s")
+  endif()
+endif()
+
+# Are we using the gold linker? It doesn't work with dynamic linking as
+# weak symbols aren't properly overridden, causing tcmalloc to be omitted.
+# Let's flag this as an error in RELEASE builds (we shouldn't release a
+# product like this).
+#
+# See https://sourceware.org/bugzilla/show_bug.cgi?id=16979 for details.
+#
+# The gold linker is only for ELF binaries, which OSX doesn't use. We can
+# just skip.
+if (NOT APPLE)
+  execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Wl,--version OUTPUT_VARIABLE LINKER_OUTPUT)
+endif ()
+if (LINKER_OUTPUT MATCHES "gold")
+  if ("${ARROW_LINK}" STREQUAL "d" AND
+      "${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE")
+    message(SEND_ERROR "Cannot use gold with dynamic linking in a RELEASE build "
+      "as it would cause tcmalloc symbols to get dropped")
+  else()
+    message("Using gold linker")
+  endif()
+  set(ARROW_USING_GOLD 1)
+else()
+  message("Using ld linker")
+endif()
+
+# Having set ARROW_LINK due to build type and/or sanitizer, it's now safe to
+# act on its value.
+if ("${ARROW_LINK}" STREQUAL "d")
+  set(BUILD_SHARED_LIBS ON)
+
+  # Position independent code is only necessary when producing shared objects.
+  add_definitions(-fPIC)
+endif()
+
+# set compile output directory
+string (TOLOWER ${CMAKE_BUILD_TYPE} BUILD_SUBDIR_NAME)
+
+# If build in-source, create the latest symlink. If build out-of-source, which is
+# preferred, simply output the binaries in the build folder
+if (${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
+  set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/build/${BUILD_SUBDIR_NAME}/")
+  # Link build/latest to the current build directory, to avoid developers
+  # accidentally running the latest debug build when in fact they're building
+  # release builds.
+  FILE(MAKE_DIRECTORY ${BUILD_OUTPUT_ROOT_DIRECTORY})
+  if (NOT APPLE)
+    set(MORE_ARGS "-T")
+  endif()
+EXECUTE_PROCESS(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY}
+  ${CMAKE_CURRENT_BINARY_DIR}/build/latest)
+else()
+  set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}/")
+endif()
+
+# where to put generated archives (.a files)
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+set(ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+
+# where to put generated libraries (.so files)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+set(LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+
+# where to put generated binaries
+set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+include_directories(src)
+
+############################################################
+# Visibility
+############################################################
+# For generate_export_header() and add_compiler_export_flags().
+include(GenerateExportHeader)
+
+############################################################
+# Testing
+############################################################
+
+# Add a new test case, with or without an executable that should be built.
+#
+# REL_TEST_NAME is the name of the test. It may be a single component
+# (e.g. monotime-test) or contain additional components (e.g.
+# net/net_util-test). Either way, the last component must be a globally
+# unique name.
+#
+# Arguments after the test name will be passed to set_tests_properties().
+function(ADD_ARROW_TEST REL_TEST_NAME)
+  if(NO_TESTS)
+    return()
+  endif()
+  get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)
+
+  if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME}.cc)
+    # This test has a corresponding .cc file, set it up as an executable.
+    set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}")
+    add_executable(${TEST_NAME} "${REL_TEST_NAME}.cc")
+    target_link_libraries(${TEST_NAME} ${ARROW_TEST_LINK_LIBS})
+  else()
+    # No executable, just invoke the test (probably a script) directly.
+    set(TEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME})
+  endif()
+
+  add_test(${TEST_NAME}
+    ${BUILD_SUPPORT_DIR}/run-test.sh ${TEST_PATH})
+  if(ARGN)
+    set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN})
+  endif()
+endfunction()
+
+# A wrapper for add_dependencies() that is compatible with NO_TESTS.
+function(ADD_ARROW_TEST_DEPENDENCIES REL_TEST_NAME)
+  if(NO_TESTS)
+    return()
+  endif()
+  get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)
+
+  add_dependencies(${TEST_NAME} ${ARGN})
+endfunction()
+
+enable_testing()
+
+############################################################
+# Dependencies
+############################################################
+function(ADD_THIRDPARTY_LIB LIB_NAME)
+  set(options)
+  set(one_value_args SHARED_LIB STATIC_LIB)
+  set(multi_value_args DEPS)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+  if(ARG_UNPARSED_ARGUMENTS)
+    message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
+  endif()
+
+  if(("${ARROW_LINK}" STREQUAL "s" AND ARG_STATIC_LIB) OR (NOT ARG_SHARED_LIB))
+    if(NOT ARG_STATIC_LIB)
+      message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
+    endif()
+    add_library(${LIB_NAME} STATIC IMPORTED)
+    set_target_properties(${LIB_NAME}
+      PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
+    message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}")
+  else()
+    add_library(${LIB_NAME} SHARED IMPORTED)
+    set_target_properties(${LIB_NAME}
+      PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
+    message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}")
+  endif()
+
+  if(ARG_DEPS)
+    set_target_properties(${LIB_NAME}
+      PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}")
+  endif()
+endfunction()
+
+## GTest
+if ("$ENV{GTEST_HOME}" STREQUAL "")
+  set(GTest_HOME ${THIRDPARTY_DIR}/googletest-release-1.7.0)
+endif()
+find_package(GTest REQUIRED)
+include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
+ADD_THIRDPARTY_LIB(gtest
+  STATIC_LIB ${GTEST_STATIC_LIB})
+
+## Google PerfTools
+##
+## Disabled with TSAN/ASAN as well as with gold+dynamic linking (see comment
+## near definition of ARROW_USING_GOLD).
+# find_package(GPerf REQUIRED)
+# if (NOT "${ARROW_USE_ASAN}" AND
+#     NOT "${ARROW_USE_TSAN}" AND
+#     NOT ("${ARROW_USING_GOLD}" AND "${ARROW_LINK}" STREQUAL "d"))
+#   ADD_THIRDPARTY_LIB(tcmalloc
+#     STATIC_LIB "${TCMALLOC_STATIC_LIB}"
+#     SHARED_LIB "${TCMALLOC_SHARED_LIB}")
+#   ADD_THIRDPARTY_LIB(profiler
+#     STATIC_LIB "${PROFILER_STATIC_LIB}"
+#     SHARED_LIB "${PROFILER_SHARED_LIB}")
+#   list(APPEND ARROW_BASE_LIBS tcmalloc profiler)
+#   add_definitions("-DTCMALLOC_ENABLED")
+#   set(ARROW_TCMALLOC_AVAILABLE 1)
+# endif()
+
+############################################################
+# Linker setup
+############################################################
+set(ARROW_MIN_TEST_LIBS arrow arrow_test_main arrow_test_util ${ARROW_BASE_LIBS})
+set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
+
+############################################################
+# "make ctags" target
+############################################################
+if (UNIX)
+  add_custom_target(ctags ctags -R --languages=c++,c)
+endif (UNIX)
+
+############################################################
+# "make etags" target
+############################################################
+if (UNIX)
+  add_custom_target(tags etags --members --declarations
+  `find ${CMAKE_CURRENT_SOURCE_DIR}/src
+   -name \\*.cc -or -name \\*.hh -or -name \\*.cpp -or -name \\*.h -or -name \\*.c -or
+   -name \\*.f`)
+  add_custom_target(etags DEPENDS tags)
+endif (UNIX)
+
+############################################################
+# "make cscope" target
+############################################################
+if (UNIX)
+  add_custom_target(cscope find ${CMAKE_CURRENT_SOURCE_DIR}
+  ( -name \\*.cc -or -name \\*.hh -or -name \\*.cpp -or
+    -name \\*.h -or -name \\*.c -or -name \\*.f )
+  -exec echo \"{}\" \; > cscope.files && cscope -q -b VERBATIM)
+endif (UNIX)
+
+############################################################
+# "make lint" target
+############################################################
+if (UNIX)
+  # Full lint
+  add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py
+  --verbose=2
+  --linelength=90
+  --filter=-whitespace/comments,-readability/todo,-build/header_guard
+    `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h`)
+endif (UNIX)
+
+#----------------------------------------------------------------------
+# Parquet adapter
+
+if(ARROW_WITH_PARQUET)
+  find_package(Parquet REQUIRED)
+  include_directories(SYSTEM ${PARQUET_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(parquet
+    STATIC_LIB ${PARQUET_STATIC_LIB}
+    SHARED_LIB ${PARQUET_SHARED_LIB})
+
+  add_subdirectory(src/arrow/parquet)
+  list(APPEND LINK_LIBS arrow_parquet parquet)
+endif()
+
+############################################################
+# Subdirectories
+############################################################
+
+add_subdirectory(src/arrow)
+add_subdirectory(src/arrow/util)
+add_subdirectory(src/arrow/types)
+
+set(LINK_LIBS
+  arrow_util
+  arrow_types)
+
+set(ARROW_SRCS
+  src/arrow/array.cc
+  src/arrow/builder.cc
+  src/arrow/type.cc
+)
+
+add_library(arrow SHARED
+  ${ARROW_SRCS}
+)
+target_link_libraries(arrow ${LINK_LIBS})
+set_target_properties(arrow PROPERTIES LINKER_LANGUAGE CXX)
+
+install(TARGETS arrow
+  LIBRARY DESTINATION lib)

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/LICENSE.txt
----------------------------------------------------------------------
diff --git a/cpp/LICENSE.txt b/cpp/LICENSE.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/cpp/LICENSE.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/README.md
----------------------------------------------------------------------
diff --git a/cpp/README.md b/cpp/README.md
new file mode 100644
index 0000000..378dc4e
--- /dev/null
+++ b/cpp/README.md
@@ -0,0 +1,48 @@
+# Arrow C++
+
+## Setup Build Environment
+
+Arrow uses CMake as a build configuration system. Currently, it supports in-source and
+out-of-source builds with the latter one being preferred.
+
+Arrow requires a C++11-enabled compiler. On Linux, gcc 4.8 and higher should be
+sufficient.
+
+To build the thirdparty build dependencies, run:
+
+```
+./thirdparty/download_thirdparty.sh
+./thirdparty/build_thirdparty.sh
+```
+
+You can also run from the root of the C++ tree
+
+```
+source setup_build_env.sh
+```
+
+Arrow is configured to use the `thirdparty` directory by default for its build
+dependencies. To set up a custom toolchain see below.
+
+Simple debug build:
+
+    mkdir debug
+    cd debug
+    cmake ..
+    make
+    ctest
+
+Simple release build:
+
+    mkdir release
+    cd release
+    cmake .. -DCMAKE_BUILD_TYPE=Release
+    make
+    ctest
+
+### Third-party environment variables
+
+To set up your own specific build toolchain, here are the relevant environment
+variables
+
+* Googletest: `GTEST_HOME` (only required to build the unit tests)

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/build-support/asan_symbolize.py
----------------------------------------------------------------------
diff --git a/cpp/build-support/asan_symbolize.py b/cpp/build-support/asan_symbolize.py
new file mode 100755
index 0000000..839a198
--- /dev/null
+++ b/cpp/build-support/asan_symbolize.py
@@ -0,0 +1,360 @@
+#!/usr/bin/env python
+#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+import bisect
+import os
+import re
+import subprocess
+import sys
+
+llvm_symbolizer = None
+symbolizers = {}
+filetypes = {}
+vmaddrs = {}
+DEBUG = False
+
+
+# FIXME: merge the code that calls fix_filename().
+def fix_filename(file_name):
+  for path_to_cut in sys.argv[1:]:
+    file_name = re.sub('.*' + path_to_cut, '', file_name)
+  file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
+  file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
+  return file_name
+
+
+class Symbolizer(object):
+  def __init__(self):
+    pass
+
+  def symbolize(self, addr, binary, offset):
+    """Symbolize the given address (pair of binary and offset).
+
+    Overriden in subclasses.
+    Args:
+        addr: virtual address of an instruction.
+        binary: path to executable/shared object containing this instruction.
+        offset: instruction offset in the @binary.
+    Returns:
+        list of strings (one string for each inlined frame) describing
+        the code locations for this instruction (that is, function name, file
+        name, line and column numbers).
+    """
+    return None
+
+
+class LLVMSymbolizer(Symbolizer):
+  def __init__(self, symbolizer_path):
+    super(LLVMSymbolizer, self).__init__()
+    self.symbolizer_path = symbolizer_path
+    self.pipe = self.open_llvm_symbolizer()
+
+  def open_llvm_symbolizer(self):
+    if not os.path.exists(self.symbolizer_path):
+      return None
+    cmd = [self.symbolizer_path,
+           '--use-symbol-table=true',
+           '--demangle=false',
+           '--functions=true',
+           '--inlining=true']
+    if DEBUG:
+      print ' '.join(cmd)
+    return subprocess.Popen(cmd, stdin=subprocess.PIPE,
+                            stdout=subprocess.PIPE)
+
+  def symbolize(self, addr, binary, offset):
+    """Overrides Symbolizer.symbolize."""
+    if not self.pipe:
+      return None
+    result = []
+    try:
+      symbolizer_input = '%s %s' % (binary, offset)
+      if DEBUG:
+        print symbolizer_input
+      print >> self.pipe.stdin, symbolizer_input
+      while True:
+        function_name = self.pipe.stdout.readline().rstrip()
+        if not function_name:
+          break
+        file_name = self.pipe.stdout.readline().rstrip()
+        file_name = fix_filename(file_name)
+        if (not function_name.startswith('??') and
+            not file_name.startswith('??')):
+          # Append only valid frames.
+          result.append('%s in %s %s' % (addr, function_name,
+                                         file_name))
+    except Exception:
+      result = []
+    if not result:
+      result = None
+    return result
+
+
+def LLVMSymbolizerFactory(system):
+  symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
+  if not symbolizer_path:
+    # Assume llvm-symbolizer is in PATH.
+    symbolizer_path = 'llvm-symbolizer'
+  return LLVMSymbolizer(symbolizer_path)
+
+
+class Addr2LineSymbolizer(Symbolizer):
+  def __init__(self, binary):
+    super(Addr2LineSymbolizer, self).__init__()
+    self.binary = binary
+    self.pipe = self.open_addr2line()
+
+  def open_addr2line(self):
+    cmd = ['addr2line', '-f', '-e', self.binary]
+    if DEBUG:
+      print ' '.join(cmd)
+    return subprocess.Popen(cmd,
+                            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+
+  def symbolize(self, addr, binary, offset):
+    """Overrides Symbolizer.symbolize."""
+    if self.binary != binary:
+      return None
+    try:
+      print >> self.pipe.stdin, offset
+      function_name = self.pipe.stdout.readline().rstrip()
+      file_name = self.pipe.stdout.readline().rstrip()
+    except Exception:
+      function_name = ''
+      file_name = ''
+    file_name = fix_filename(file_name)
+    return ['%s in %s %s' % (addr, function_name, file_name)]
+
+
+class DarwinSymbolizer(Symbolizer):
+  def __init__(self, addr, binary):
+    super(DarwinSymbolizer, self).__init__()
+    self.binary = binary
+    # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
+    if len(addr) > 10:
+      self.arch = 'x86_64'
+    else:
+      self.arch = 'i386'
+    self.vmaddr = None
+    self.pipe = None
+
+  def write_addr_to_pipe(self, offset):
+    print >> self.pipe.stdin, '0x%x' % int(offset, 16)
+
+  def open_atos(self):
+    if DEBUG:
+      print 'atos -o %s -arch %s' % (self.binary, self.arch)
+    cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
+    self.pipe = subprocess.Popen(cmdline,
+                                 stdin=subprocess.PIPE,
+                                 stdout=subprocess.PIPE,
+                                 stderr=subprocess.PIPE)
+
+  def symbolize(self, addr, binary, offset):
+    """Overrides Symbolizer.symbolize."""
+    if self.binary != binary:
+      return None
+    self.open_atos()
+    self.write_addr_to_pipe(offset)
+    self.pipe.stdin.close()
+    atos_line = self.pipe.stdout.readline().rstrip()
+    # A well-formed atos response looks like this:
+    #   foo(type1, type2) (in object.name) (filename.cc:80)
+    match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
+    if DEBUG:
+      print 'atos_line: ', atos_line
+    if match:
+      function_name = match.group(1)
+      function_name = re.sub('\(.*?\)', '', function_name)
+      file_name = fix_filename(match.group(3))
+      return ['%s in %s %s' % (addr, function_name, file_name)]
+    else:
+      return ['%s in %s' % (addr, atos_line)]
+
+
+# Chain several symbolizers so that if one symbolizer fails, we fall back
+# to the next symbolizer in chain.
+class ChainSymbolizer(Symbolizer):
+  def __init__(self, symbolizer_list):
+    super(ChainSymbolizer, self).__init__()
+    self.symbolizer_list = symbolizer_list
+
+  def symbolize(self, addr, binary, offset):
+    """Overrides Symbolizer.symbolize."""
+    for symbolizer in self.symbolizer_list:
+      if symbolizer:
+        result = symbolizer.symbolize(addr, binary, offset)
+        if result:
+          return result
+    return None
+
+  def append_symbolizer(self, symbolizer):
+    self.symbolizer_list.append(symbolizer)
+
+
+def BreakpadSymbolizerFactory(binary):
+  suffix = os.getenv('BREAKPAD_SUFFIX')
+  if suffix:
+    filename = binary + suffix
+    if os.access(filename, os.F_OK):
+      return BreakpadSymbolizer(filename)
+  return None
+
+
+def SystemSymbolizerFactory(system, addr, binary):
+  if system == 'Darwin':
+    return DarwinSymbolizer(addr, binary)
+  elif system == 'Linux':
+    return Addr2LineSymbolizer(binary)
+
+
+class BreakpadSymbolizer(Symbolizer):
+  def __init__(self, filename):
+    super(BreakpadSymbolizer, self).__init__()
+    self.filename = filename
+    lines = file(filename).readlines()
+    self.files = []
+    self.symbols = {}
+    self.address_list = []
+    self.addresses = {}
+    # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
+    fragments = lines[0].rstrip().split()
+    self.arch = fragments[2]
+    self.debug_id = fragments[3]
+    self.binary = ' '.join(fragments[4:])
+    self.parse_lines(lines[1:])
+
+  def parse_lines(self, lines):
+    cur_function_addr = ''
+    for line in lines:
+      fragments = line.split()
+      if fragments[0] == 'FILE':
+        assert int(fragments[1]) == len(self.files)
+        self.files.append(' '.join(fragments[2:]))
+      elif fragments[0] == 'PUBLIC':
+        self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
+      elif fragments[0] in ['CFI', 'STACK']:
+        pass
+      elif fragments[0] == 'FUNC':
+        cur_function_addr = int(fragments[1], 16)
+        if not cur_function_addr in self.symbols.keys():
+          self.symbols[cur_function_addr] = ' '.join(fragments[4:])
+      else:
+        # Line starting with an address.
+        addr = int(fragments[0], 16)
+        self.address_list.append(addr)
+        # Tuple of symbol address, size, line, file number.
+        self.addresses[addr] = (cur_function_addr,
+                                int(fragments[1], 16),
+                                int(fragments[2]),
+                                int(fragments[3]))
+    self.address_list.sort()
+
+  def get_sym_file_line(self, addr):
+    key = None
+    if addr in self.addresses.keys():
+      key = addr
+    else:
+      index = bisect.bisect_left(self.address_list, addr)
+      if index == 0:
+        return None
+      else:
+        key = self.address_list[index - 1]
+    sym_id, size, line_no, file_no = self.addresses[key]
+    symbol = self.symbols[sym_id]
+    filename = self.files[file_no]
+    if addr < key + size:
+      return symbol, filename, line_no
+    else:
+      return None
+
+  def symbolize(self, addr, binary, offset):
+    if self.binary != binary:
+      return None
+    res = self.get_sym_file_line(int(offset, 16))
+    if res:
+      function_name, file_name, line_no = res
+      result = ['%s in %s %s:%d' % (
+          addr, function_name, file_name, line_no)]
+      print result
+      return result
+    else:
+      return None
+
+
+class SymbolizationLoop(object):
+  def __init__(self, binary_name_filter=None):
+    # Used by clients who may want to supply a different binary name.
+    # E.g. in Chrome several binaries may share a single .dSYM.
+    self.binary_name_filter = binary_name_filter
+    self.system = os.uname()[0]
+    if self.system in ['Linux', 'Darwin']:
+      self.llvm_symbolizer = LLVMSymbolizerFactory(self.system)
+    else:
+      raise Exception('Unknown system')
+
+  def symbolize_address(self, addr, binary, offset):
+    # Use the chain of symbolizers:
+    # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
+    # (fall back to next symbolizer if the previous one fails).
+    if not binary in symbolizers:
+      symbolizers[binary] = ChainSymbolizer(
+          [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
+    result = symbolizers[binary].symbolize(addr, binary, offset)
+    if result is None:
+      # Initialize system symbolizer only if other symbolizers failed.
+      symbolizers[binary].append_symbolizer(
+          SystemSymbolizerFactory(self.system, addr, binary))
+      result = symbolizers[binary].symbolize(addr, binary, offset)
+    # The system symbolizer must produce some result.
+    assert result
+    return result
+
+  def print_symbolized_lines(self, symbolized_lines):
+    if not symbolized_lines:
+      print self.current_line
+    else:
+      for symbolized_frame in symbolized_lines:
+        print '    #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()
+        self.frame_no += 1
+
+  def process_stdin(self):
+    self.frame_no = 0
+    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
+
+    while True:
+      line = sys.stdin.readline()
+      if not line: break
+      self.current_line = line.rstrip()
+      #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
+      stack_trace_line_format = (
+          '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
+      match = re.match(stack_trace_line_format, line)
+      if not match:
+        print self.current_line
+        continue
+      if DEBUG:
+        print line
+      _, frameno_str, addr, binary, offset = match.groups()
+      if frameno_str == '0':
+        # Assume that frame #0 is the first frame of new stack trace.
+        self.frame_no = 0
+      original_binary = binary
+      if self.binary_name_filter:
+        binary = self.binary_name_filter(binary)
+      symbolized_line = self.symbolize_address(addr, binary, offset)
+      if not symbolized_line:
+        if original_binary != binary:
+          symbolized_line = self.symbolize_address(addr, binary, offset)
+      self.print_symbolized_lines(symbolized_line)
+
+
+if __name__ == '__main__':
+  loop = SymbolizationLoop()
+  loop.process_stdin()

http://git-wip-us.apache.org/repos/asf/arrow/blob/23c4b08d/cpp/build-support/bootstrap_toolchain.py
----------------------------------------------------------------------
diff --git a/cpp/build-support/bootstrap_toolchain.py b/cpp/build-support/bootstrap_toolchain.py
new file mode 100755
index 0000000..128be78
--- /dev/null
+++ b/cpp/build-support/bootstrap_toolchain.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+# Copyright (c) 2015, Cloudera, inc.
+# Confidential Cloudera Information: Covered by NDA.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Bootstrapping the native toolchain with prebuilt binaries
+#
+# The purpose of this script is to download prebuilt artifacts of the native toolchain to
+# satisfy the third-party dependencies. The script checks for the presence of
+# NATIVE_TOOLCHAIN. NATIVE_TOOLCHAIN indicates the location where the prebuilt artifacts
+# should be extracted to.
+#
+# The script is called as follows without any additional parameters:
+#
+#     python bootstrap_toolchain.py
+import sh
+import os
+import sys
+import re
+
+HOST = "https://native-toolchain.s3.amazonaws.com/build"
+
+OS_MAPPING = {
+  "centos6" : "ec2-package-centos-6",
+  "centos5" : "ec2-package-centos-5",
+  "centos7" : "ec2-package-centos-7",
+  "debian6" : "ec2-package-debian-6",
+  "debian7" : "ec2-package-debian-7",
+  "suselinux11": "ec2-package-sles-11",
+  "ubuntu12.04" : "ec2-package-ubuntu-12-04",
+  "ubuntu14.04" : "ec2-package-ubuntu-14-04"
+}
+
+def get_release_label():
+  """Gets the right package label from the OS version"""
+  release = "".join(map(lambda x: x.lower(), sh.lsb_release("-irs").split()))
+  for k, v in OS_MAPPING.iteritems():
+    if re.search(k, release):
+      return v
+
+  print("Pre-built toolchain archives not available for your platform.")
+  print("Clone and build native toolchain from source using this repository:")
+  print("    https://github.com/cloudera/native-toolchain")
+  raise Exception("Could not find package label for OS version: {0}.".format(release))
+
+def download_package(destination, product, version, compiler):
+  label = get_release_label()
+  file_name = "{0}-{1}-{2}-{3}.tar.gz".format(product, version, compiler, label)
+  url_path="/{0}/{1}-{2}/{0}-{1}-{2}-{3}.tar.gz".format(product, version, compiler, label)
+  download_path = HOST + url_path
+
+  print "URL {0}".format(download_path)
+  print "Downloading {0} to {1}".format(file_name, destination)
+  # --no-clobber avoids downloading the file if a file with the name already exists
+  sh.wget(download_path, directory_prefix=destination, no_clobber=True)
+  print "Extracting {0}".format(file_name)
+  sh.tar(z=True, x=True, f=os.path.join(destination, file_name), directory=destination)
+  sh.rm(os.path.join(destination, file_name))
+
+
+def bootstrap(packages):
+  """Validates the presence of $NATIVE_TOOLCHAIN in the environment. By checking
+  $NATIVE_TOOLCHAIN is present, we assume that {LIB}_VERSION will be present as well. Will
+  create the directory specified by $NATIVE_TOOLCHAIN if it does not yet exist. Each of
+  the packages specified in `packages` is downloaded and extracted into $NATIVE_TOOLCHAIN.
+  """
+  # Create the destination directory if necessary
+  destination = os.getenv("NATIVE_TOOLCHAIN")
+  if not destination:
+    print("Build environment not set up correctly, make sure "
+          "$NATIVE_TOOLCHAIN is present.")
+    sys.exit(1)
+
+  if not os.path.exists(destination):
+    os.makedirs(destination)
+
+  # Detect the compiler
+  if "SYSTEM_GCC" in os.environ:
+    compiler = "gcc-system"
+  else:
+    compiler = "gcc-{0}".format(os.environ["GCC_VERSION"])
+
+  for p in packages:
+    pkg_name, pkg_version = unpack_name_and_version(p)
+    download_package(destination, pkg_name, pkg_version, compiler)
+
+def unpack_name_and_version(package):
+  """A package definition is either a string where the version is fetched from the
+  environment or a tuple where the package name and the package version are fully
+  specified.
+  """
+  if isinstance(package, basestring):
+    env_var = "{0}_VERSION".format(package).replace("-", "_").upper()
+    try:
+      return package, os.environ[env_var]
+    except KeyError:
+      raise Exception("Could not find version for {0} in environment var {1}".format(
+        package, env_var))
+  return package[0], package[1]
+
+if __name__ == "__main__":
+  packages = [("gcc","4.9.2"), ("gflags", "2.0"), ("glog", "0.3.3-p1"),
+              ("gperftools", "2.3"), ("libunwind", "1.1"), ("googletest", "20151222")]
+  bootstrap(packages)


Mime
View raw message