parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From u..@apache.org
Subject [parquet-cpp] branch master updated: PARQUET-1196: Example parquet_arrow project
Date Thu, 15 Feb 2018 17:42:28 GMT
This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-cpp.git


The following commit(s) were added to refs/heads/master by this push:
     new 76388ea  PARQUET-1196: Example parquet_arrow project
76388ea is described below

commit 76388ea4eb8b23656283116bc656b0c8f5db093b
Author: Uwe L. Korn <uwelk@xhochy.com>
AuthorDate: Thu Feb 15 18:42:22 2018 +0100

    PARQUET-1196: Example parquet_arrow project
    
    Depends on https://github.com/apache/parquet-cpp/pull/434
    
    Author: Uwe L. Korn <uwelk@xhochy.com>
    Author: Korn, Uwe <Uwe.Korn@blue-yonder.com>
    
    Closes #436 from xhochy/PARQUET-1196 and squashes the following commits:
    
    a938da7 [Uwe L. Korn] Check Status for PrettyPrint
    15d62f3 [Uwe L. Korn] PARQUET-1196: Example parquet_arrow project
    1280fd5 [Korn, Uwe] PARQUET-1200: Support reading a single Arrow column from a Parquet
file
---
 CMakeLists.txt                                     |  23 +++-
 ci/travis_script_toolchain.sh                      |  18 +++
 cmake_modules/ArrowExternalProject.cmake           |  74 +++++++++++
 cmake_modules/FindArrow.cmake                      |   4 +
 cmake_modules/ThirdpartyToolchain.cmake            |  62 +--------
 dev/release/run-rat.sh                             |   3 +
 examples/{ => low-level-api}/CMakeLists.txt        |   0
 examples/{ => low-level-api}/reader-writer.cc      |   0
 examples/parquet-arrow/CMakeLists.txt              |  78 +++++++++++
 examples/parquet-arrow/README.md                   |  20 +++
 .../cmake_modules/ArrowExternalProject.cmake       |   1 +
 .../parquet-arrow/cmake_modules/FindArrow.cmake    |   1 +
 .../parquet-arrow/cmake_modules/FindParquet.cmake  | 145 +++++++++++++++++++++
 examples/parquet-arrow/src/reader-writer.cc        | 134 +++++++++++++++++++
 14 files changed, 499 insertions(+), 64 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f8fdf32..c2d4ef4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -431,7 +431,17 @@ endif (UNIX)
 ############################################################
 
 # runs clang format and updates files in place.
-add_custom_target(format ${BUILD_SUPPORT_DIR}/run_clang_format.py
+add_custom_target(format-example
+  COMMAND
+  ${BUILD_SUPPORT_DIR}/run_clang_format.py
+  ${CLANG_FORMAT_BIN}
+  ${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt
+  ${CMAKE_CURRENT_SOURCE_DIR}/examples/parquet-arrow)
+
+add_custom_target(format
+  DEPENDS format-example
+  COMMAND
+  ${BUILD_SUPPORT_DIR}/run_clang_format.py
   ${CLANG_FORMAT_BIN}
   ${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt
   ${CMAKE_CURRENT_SOURCE_DIR}/src)
@@ -439,7 +449,14 @@ add_custom_target(format ${BUILD_SUPPORT_DIR}/run_clang_format.py
 # runs clang format and exits with a non-zero exit code if any files need to be reformatted
 
 # TODO(wesm): Make this work in run_clang_format.py
-add_custom_target(check-format ${BUILD_SUPPORT_DIR}/run_clang_format.py
+add_custom_target(check-format-examples ${BUILD_SUPPORT_DIR}/run_clang_format.py
+   ${CLANG_FORMAT_BIN}
+   ${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt
+   ${CMAKE_CURRENT_SOURCE_DIR}/examples/parquet-arrow 1)
+add_custom_target(check-format
+   DEPENDS check-format-examples
+   COMMAND
+   ${BUILD_SUPPORT_DIR}/run_clang_format.py
    ${CLANG_FORMAT_BIN}
    ${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt
    ${CMAKE_CURRENT_SOURCE_DIR}/src 1)
@@ -731,7 +748,7 @@ add_subdirectory(src/parquet/util)
 if (NOT MSVC)
   add_subdirectory(benchmarks)
 endif()
-add_subdirectory(examples)
+add_subdirectory(examples/low-level-api)
 add_subdirectory(tools)
 
 add_custom_target(clean-all
diff --git a/ci/travis_script_toolchain.sh b/ci/travis_script_toolchain.sh
index afa3eec..7db9658 100755
--- a/ci/travis_script_toolchain.sh
+++ b/ci/travis_script_toolchain.sh
@@ -54,11 +54,29 @@ export BOOST_ROOT=$CPP_TOOLCHAIN
 cmake -DPARQUET_CXXFLAGS=-Werror \
       -DPARQUET_TEST_MEMCHECK=ON \
       -DPARQUET_GENERATE_COVERAGE=1 \
+      -DCMAKE_INSTALL_PREFIX=$CPP_TOOLCHAIN \
       $TRAVIS_BUILD_DIR
 
 pushd $CPP_BUILD_DIR
 
 make -j4 || exit 1
+make install || exit 1
 ctest -VV -L unittest || { cat $TRAVIS_BUILD_DIR/parquet-build/Testing/Temporary/LastTest.log;
exit 1; }
 
 popd
+
+# Build and run the parquet::arrow example. This also tests the usage of parquet-cpp as a
library.
+
+pushd $TRAVIS_BUILD_DIR/examples/parquet-arrow
+mkdir build
+pushd build
+
+export ARROW_HOME=$CPP_TOOLCHAIN
+export PARQUET_HOME=$CPP_TOOLCHAIN
+
+cmake ..
+make VERBOSE=1
+./parquet-arrow-reader-writer
+
+popd
+popd
diff --git a/cmake_modules/ArrowExternalProject.cmake b/cmake_modules/ArrowExternalProject.cmake
new file mode 100644
index 0000000..4f23661
--- /dev/null
+++ b/cmake_modules/ArrowExternalProject.cmake
@@ -0,0 +1,74 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(ARROW_PREFIX "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include")
+set(ARROW_LIB_DIR "${ARROW_PREFIX}")
+if (MSVC)
+  set(ARROW_SHARED_LIB "${ARROW_PREFIX}/bin/arrow.dll")
+  set(ARROW_SHARED_IMPLIB "${ARROW_LIB_DIR}/arrow.lib")
+  set(ARROW_STATIC_LIB "${ARROW_LIB_DIR}/arrow_static.lib")
+else()
+  set(ARROW_SHARED_LIB "${ARROW_LIB_DIR}/libarrow${CMAKE_SHARED_LIBRARY_SUFFIX}")
+  set(ARROW_STATIC_LIB "${ARROW_LIB_DIR}/libarrow.a")
+endif()
+
+set(ARROW_CMAKE_ARGS
+  -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+  -DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}
+  -DCMAKE_C_FLAGS=${EP_C_FLAGS}
+  -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}
+  -DCMAKE_INSTALL_LIBDIR=${ARROW_LIB_DIR}
+  -DARROW_JEMALLOC=OFF
+  -DARROW_IPC=OFF
+  -DARROW_WITH_LZ4=ON
+  -DARROW_WITH_ZSTD=ON
+  -DARROW_BUILD_SHARED=${PARQUET_BUILD_SHARED}
+  -DARROW_BOOST_USE_SHARED=${PARQUET_BOOST_USE_SHARED}
+  -DARROW_BUILD_TESTS=OFF)
+
+if (MSVC AND PARQUET_USE_STATIC_CRT)
+  set(ARROW_CMAKE_ARGS ${ARROW_CMAKE_ARGS} -DARROW_USE_STATIC_CRT=ON)
+endif()
+
+if ("$ENV{PARQUET_ARROW_VERSION}" STREQUAL "")
+  set(ARROW_VERSION "501d60e918bd4d10c429ab34e0b8e8a87dffb732")
+else()
+  set(ARROW_VERSION "$ENV{PARQUET_ARROW_VERSION}")
+endif()
+message(STATUS "Building Apache Arrow from commit: ${ARROW_VERSION}")
+
+set(ARROW_URL "https://github.com/apache/arrow/archive/${ARROW_VERSION}.tar.gz")
+
+if (CMAKE_VERSION VERSION_GREATER "3.7")
+  set(ARROW_CONFIGURE SOURCE_SUBDIR "cpp" CMAKE_ARGS ${ARROW_CMAKE_ARGS})
+else()
+  set(ARROW_CONFIGURE CONFIGURE_COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}"
+    ${ARROW_CMAKE_ARGS} "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix/src/arrow_ep/cpp")
+endif()
+
+ExternalProject_Add(arrow_ep
+  URL ${ARROW_URL}
+  ${ARROW_CONFIGURE}
+  BUILD_BYPRODUCTS "${ARROW_SHARED_LIB}" "${ARROW_STATIC_LIB}")
+
+if (MSVC)
+  ExternalProject_Add_Step(arrow_ep copy_dll_step
+    DEPENDEES install
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${BUILD_OUTPUT_ROOT_DIRECTORY}
+    COMMAND ${CMAKE_COMMAND} -E copy ${ARROW_SHARED_LIB} ${BUILD_OUTPUT_ROOT_DIRECTORY})
+endif()
diff --git a/cmake_modules/FindArrow.cmake b/cmake_modules/FindArrow.cmake
index 6af9f88..b63b1af 100644
--- a/cmake_modules/FindArrow.cmake
+++ b/cmake_modules/FindArrow.cmake
@@ -22,6 +22,10 @@
 #  ARROW_SHARED_LIB, path to libarrow's shared library
 #  ARROW_FOUND, whether arrow has been found
 
+if (DEFINED ENV{ARROW_HOME})
+  set(ARROW_HOME "$ENV{ARROW_HOME}")
+endif()
+
 if ("${ARROW_HOME}" STREQUAL "")
   # PARQUET-955. If the user has set $ARROW_HOME in the environment, we respect
   # this, otherwise try to locate the pkgconfig in the system environment
diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake
index 08b2a4c..09e30df 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -48,10 +48,6 @@ if (DEFINED ENV{THRIFT_HOME})
   set(THRIFT_HOME "$ENV{THRIFT_HOME}")
 endif()
 
-if (DEFINED ENV{ARROW_HOME})
-  set(ARROW_HOME "$ENV{ARROW_HOME}")
-endif()
-
 # ----------------------------------------------------------------------
 # Boost
 
@@ -355,63 +351,7 @@ endif()
 
 find_package(Arrow)
 if (NOT ARROW_FOUND)
-  set(ARROW_PREFIX "${BUILD_OUTPUT_ROOT_DIRECTORY}")
-  set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include")
-  set(ARROW_LIB_DIR "${ARROW_PREFIX}")
-  if (MSVC)
-    set(ARROW_SHARED_LIB "${ARROW_PREFIX}/bin/arrow.dll")
-    set(ARROW_SHARED_IMPLIB "${ARROW_LIB_DIR}/arrow.lib")
-    set(ARROW_STATIC_LIB "${ARROW_LIB_DIR}/arrow_static.lib")
-  else()
-    set(ARROW_SHARED_LIB "${ARROW_LIB_DIR}/libarrow${CMAKE_SHARED_LIBRARY_SUFFIX}")
-    set(ARROW_STATIC_LIB "${ARROW_LIB_DIR}/libarrow.a")
-  endif()
-
-  set(ARROW_CMAKE_ARGS
-    -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-    -DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}
-    -DCMAKE_C_FLAGS=${EP_C_FLAGS}
-    -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}
-    -DCMAKE_INSTALL_LIBDIR=${ARROW_LIB_DIR}
-    -DARROW_JEMALLOC=OFF
-    -DARROW_IPC=OFF
-    -DARROW_WITH_LZ4=ON
-    -DARROW_WITH_ZSTD=ON
-    -DARROW_BUILD_SHARED=${PARQUET_BUILD_SHARED}
-    -DARROW_BOOST_USE_SHARED=${PARQUET_BOOST_USE_SHARED}
-    -DARROW_BUILD_TESTS=OFF)
-
-  if (MSVC AND PARQUET_USE_STATIC_CRT)
-    set(ARROW_CMAKE_ARGS ${ARROW_CMAKE_ARGS} -DARROW_USE_STATIC_CRT=ON)
-  endif()
-
-  if ("$ENV{PARQUET_ARROW_VERSION}" STREQUAL "")
-    set(ARROW_VERSION "501d60e918bd4d10c429ab34e0b8e8a87dffb732")
-  else()
-    set(ARROW_VERSION "$ENV{PARQUET_ARROW_VERSION}")
-  endif()
-  message(STATUS "Building Apache Arrow from commit: ${ARROW_VERSION}")
-
-  set(ARROW_URL "https://github.com/apache/arrow/archive/${ARROW_VERSION}.tar.gz")
-
-  if (CMAKE_VERSION VERSION_GREATER "3.7")
-    set(ARROW_CONFIGURE SOURCE_SUBDIR "cpp" CMAKE_ARGS ${ARROW_CMAKE_ARGS})
-  else()
-    set(ARROW_CONFIGURE CONFIGURE_COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}"
-      ${ARROW_CMAKE_ARGS} "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix/src/arrow_ep/cpp")
-  endif()
-
-  ExternalProject_Add(arrow_ep
-    URL ${ARROW_URL}
-    ${ARROW_CONFIGURE}
-    BUILD_BYPRODUCTS "${ARROW_SHARED_LIB}" "${ARROW_STATIC_LIB}")
-
-  if (MSVC)
-    ExternalProject_Add_Step(arrow_ep copy_dll_step
-      DEPENDEES install
-      COMMAND ${CMAKE_COMMAND} -E make_directory ${BUILD_OUTPUT_ROOT_DIRECTORY}
-      COMMAND ${CMAKE_COMMAND} -E copy ${ARROW_SHARED_LIB} ${BUILD_OUTPUT_ROOT_DIRECTORY})
-  endif()
+  include(ArrowExternalProject)
   set(ARROW_VENDORED 1)
 else()
   set(ARROW_VENDORED 0)
diff --git a/dev/release/run-rat.sh b/dev/release/run-rat.sh
index 4d3b752..2f0d4ff 100755
--- a/dev/release/run-rat.sh
+++ b/dev/release/run-rat.sh
@@ -24,6 +24,7 @@ curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/0.12/apache-rat
 RAT="java -jar apache-rat-0.12.jar -d "
 
 # generate the rat report
+# Ignore symlinks as RAT does not seem to understand them.
 $RAT $1 \
   -e ".*" \
   -e mman.h \
@@ -32,6 +33,8 @@ $RAT $1 \
   -e cpplint.py \
   -e pax_global_header \
   -e clang_format_exclusions.txt \
+  -e ArrowExternalProject.cmake \
+  -e FindArrow.cmake \
   > rat.txt
 cat rat.txt
 UNAPPROVED=`cat rat.txt  | grep "Unknown Licenses" | head -n 1 | cut -d " " -f 1`
diff --git a/examples/CMakeLists.txt b/examples/low-level-api/CMakeLists.txt
similarity index 100%
rename from examples/CMakeLists.txt
rename to examples/low-level-api/CMakeLists.txt
diff --git a/examples/reader-writer.cc b/examples/low-level-api/reader-writer.cc
similarity index 100%
rename from examples/reader-writer.cc
rename to examples/low-level-api/reader-writer.cc
diff --git a/examples/parquet-arrow/CMakeLists.txt b/examples/parquet-arrow/CMakeLists.txt
new file mode 100644
index 0000000..897fcfb
--- /dev/null
+++ b/examples/parquet-arrow/CMakeLists.txt
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Require cmake that supports BYPRODUCTS in add_custom_command, ExternalProject_Add [1].
+cmake_minimum_required(VERSION 3.2.0)
+
+project(parquet-arrow-example)
+
+include(ExternalProject)
+include(FindPkgConfig)
+include(GNUInstallDirs)
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules")
+
+# This ensures that things like gnu++11 get passed correctly
+set(CMAKE_CXX_STANDARD 11)
+
+# We require a C++11 compliant compiler
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# We want to link dynamically against Arrow and Parquet
+set(PARQUET_BUILD_SHARED ON)
+
+
+# First search the packages in the system. If they are not found, use CMake's
+# ExternalProject mechanism to build them locally.
+find_package(Arrow)
+if (NOT ARROW_FOUND)
+  # set compile output directory
+  if (NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE Debug)
+  endif(NOT CMAKE_BUILD_TYPE)
+  string (TOLOWER ${CMAKE_BUILD_TYPE} BUILD_SUBDIR_NAME)
+  # If build in-source, create the latest symlink. If build out-of-source, which is
+  # preferred, simply output the binaries in the build folder
+  if (${CMAKE_SOURCE_DIR} STREQUAL "${CMAKE_CURRENT_BINARY_DIR}")
+    set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/build/${BUILD_SUBDIR_NAME}")
+    # Link build/latest to the current build directory, to avoid developers
+    # accidentally running the latest debug build when in fact they're building
+    # release builds.
+    FILE(MAKE_DIRECTORY ${BUILD_OUTPUT_ROOT_DIRECTORY})
+    if (NOT APPLE)
+      set(MORE_ARGS "-T")
+    endif()
+  EXECUTE_PROCESS(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY}
+    ${CMAKE_CURRENT_BINARY_DIR}/build/latest)
+  else()
+    set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}")
+  endif()
+
+  include(ArrowExternalProject)
+  set(ARROW_VENDORED 1)
+else()
+  set(ARROW_VENDORED 0)
+endif()
+find_package(Parquet)
+
+include_directories(SYSTEM ${ARROW_INCLUDE_DIR} ${PARQUET_INCLUDE_DIR})
+
+add_executable(parquet-arrow-reader-writer src/reader-writer.cc)
+target_link_libraries(parquet-arrow-reader-writer ${PARQUET_SHARED_LIB} ${ARROW_SHARED_LIB})
+if (ARROW_VENDORED)
+  add_dependencies(parquet-arrow-reader-writer arrow_ep)
+endif()
diff --git a/examples/parquet-arrow/README.md b/examples/parquet-arrow/README.md
new file mode 100644
index 0000000..e99819f
--- /dev/null
+++ b/examples/parquet-arrow/README.md
@@ -0,0 +1,20 @@
+<!---
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+Using parquet-cpp with the arrow interface
+==========================================
+
+This folder contains an example project that shows how to setup a CMake project
+that consumes `parquet-cpp` as a library as well as how you can use the
+`parquet/arrow` interface to reading and write Apache Parquet files.
diff --git a/examples/parquet-arrow/cmake_modules/ArrowExternalProject.cmake b/examples/parquet-arrow/cmake_modules/ArrowExternalProject.cmake
new file mode 120000
index 0000000..b535f6e
--- /dev/null
+++ b/examples/parquet-arrow/cmake_modules/ArrowExternalProject.cmake
@@ -0,0 +1 @@
+../../../cmake_modules/ArrowExternalProject.cmake
\ No newline at end of file
diff --git a/examples/parquet-arrow/cmake_modules/FindArrow.cmake b/examples/parquet-arrow/cmake_modules/FindArrow.cmake
new file mode 120000
index 0000000..6c451ce
--- /dev/null
+++ b/examples/parquet-arrow/cmake_modules/FindArrow.cmake
@@ -0,0 +1 @@
+../../../cmake_modules/FindArrow.cmake
\ No newline at end of file
diff --git a/examples/parquet-arrow/cmake_modules/FindParquet.cmake b/examples/parquet-arrow/cmake_modules/FindParquet.cmake
new file mode 100644
index 0000000..8bbe05f
--- /dev/null
+++ b/examples/parquet-arrow/cmake_modules/FindParquet.cmake
@@ -0,0 +1,145 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# - Find PARQUET (parquet/parquet.h, libparquet.a, libparquet.so)
+# This module defines
+#  PARQUET_INCLUDE_DIR, directory containing headers
+#  PARQUET_LIBS, directory containing parquet libraries
+#  PARQUET_STATIC_LIB, path to libparquet.a
+#  PARQUET_SHARED_LIB, path to libparquet's shared library
+#  PARQUET_SHARED_IMP_LIB, path to libparquet's import library (MSVC only)
+#  PARQUET_FOUND, whether parquet has been found
+
+include(FindPkgConfig)
+
+if(NOT "$ENV{PARQUET_HOME}" STREQUAL "")
+    set(PARQUET_HOME "$ENV{PARQUET_HOME}")
+endif()
+
+if (MSVC)
+  SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".dll")
+
+  if (MSVC AND NOT PARQUET_MSVC_STATIC_LIB_SUFFIX)
+    set(PARQUET_MSVC_STATIC_LIB_SUFFIX "_static")
+  endif()
+
+  find_library(PARQUET_SHARED_LIBRARIES NAMES parquet
+    PATHS ${PARQUET_HOME} NO_DEFAULT_PATH
+    PATH_SUFFIXES "bin" )
+
+  get_filename_component(PARQUET_SHARED_LIBS ${PARQUET_SHARED_LIBRARIES} PATH )
+endif ()
+
+if(PARQUET_HOME)
+    set(PARQUET_SEARCH_HEADER_PATHS
+        ${PARQUET_HOME}/include
+        )
+    set(PARQUET_SEARCH_LIB_PATH
+        ${PARQUET_HOME}/lib
+        )
+    find_path(PARQUET_INCLUDE_DIR parquet/api/reader.h PATHS
+        ${PARQUET_SEARCH_HEADER_PATHS}
+        # make sure we don't accidentally pick up a different version
+        NO_DEFAULT_PATH
+        )
+    find_library(PARQUET_LIBRARIES NAMES parquet
+        PATHS ${PARQUET_HOME} NO_DEFAULT_PATH
+        PATH_SUFFIXES "lib")
+    get_filename_component(PARQUET_LIBS ${PARQUET_LIBRARIES} PATH )
+
+    # Try to autodiscover the Parquet ABI version
+    get_filename_component(PARQUET_LIB_REALPATH ${PARQUET_LIBRARIES} REALPATH)
+    get_filename_component(PARQUET_EXT_REALPATH ${PARQUET_LIB_REALPATH} EXT)
+    string(REGEX MATCH ".([0-9]+.[0-9]+.[0-9]+)" HAS_ABI_VERSION ${PARQUET_EXT_REALPATH})
+    if (HAS_ABI_VERSION)
+      if (APPLE)
+        string(REGEX REPLACE ".([0-9]+.[0-9]+.[0-9]+).dylib" "\\1" PARQUET_ABI_VERSION ${PARQUET_EXT_REALPATH})
+      else()
+        string(REGEX REPLACE ".so.([0-9]+.[0-9]+.[0-9]+)" "\\1" PARQUET_ABI_VERSION ${PARQUET_EXT_REALPATH})
+      endif()
+      string(REGEX REPLACE "([0-9]+).[0-9]+.[0-9]+" "\\1" PARQUET_SO_VERSION ${PARQUET_ABI_VERSION})
+    else()
+      set(PARQUET_ABI_VERSION "1.0.0")
+      set(PARQUET_SO_VERSION "1")
+    endif()
+else()
+    pkg_check_modules(PARQUET parquet)
+    if (PARQUET_FOUND)
+        pkg_get_variable(PARQUET_ABI_VERSION parquet abi_version)
+        message(STATUS "Parquet C++ ABI version: ${PARQUET_ABI_VERSION}")
+        pkg_get_variable(PARQUET_SO_VERSION parquet so_version)
+        message(STATUS "Parquet C++ SO version: ${PARQUET_SO_VERSION}")
+        set(PARQUET_INCLUDE_DIR ${PARQUET_INCLUDE_DIRS})
+        set(PARQUET_LIBS ${PARQUET_LIBRARY_DIRS})
+        set(PARQUET_SEARCH_LIB_PATH ${PARQUET_LIBRARY_DIRS})
+        message(STATUS "Searching for parquet libs in: ${PARQUET_SEARCH_LIB_PATH}")
+        find_library(PARQUET_LIBRARIES NAMES parquet
+            PATHS ${PARQUET_SEARCH_LIB_PATH} NO_DEFAULT_PATH)
+    else()
+        find_path(PARQUET_INCLUDE_DIR NAMES parquet/api/reader.h )
+        find_library(PARQUET_LIBRARIES NAMES parquet)
+        get_filename_component(PARQUET_LIBS ${PARQUET_LIBRARIES} PATH )
+    endif()
+endif()
+
+if (PARQUET_INCLUDE_DIR AND PARQUET_LIBRARIES)
+  set(PARQUET_FOUND TRUE)
+  set(PARQUET_LIB_NAME parquet)
+  if (MSVC)
+    set(PARQUET_STATIC_LIB "${PARQUET_LIBS}/${PARQUET_LIB_NAME}${PARQUET_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    set(PARQUET_SHARED_LIB "${PARQUET_SHARED_LIBS}/${PARQUET_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    set(PARQUET_SHARED_IMP_LIB "${PARQUET_LIBS}/${PARQUET_LIB_NAME}.lib")
+  else()
+    set(PARQUET_STATIC_LIB ${PARQUET_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${PARQUET_LIB_NAME}.a)
+    set(PARQUET_SHARED_LIB ${PARQUET_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${PARQUET_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+  endif()
+else ()
+  set(PARQUET_FOUND FALSE)
+endif ()
+
+if (PARQUET_FOUND)
+  if (NOT Parquet_FIND_QUIETLY)
+    message(STATUS "Found the Parquet library: ${PARQUET_LIBRARIES}")
+  endif ()
+else ()
+  if (NOT Parquet_FIND_QUIETLY)
+    if (NOT PARQUET_FOUND)
+      set(PARQUET_ERR_MSG "${PARQUET_ERR_MSG} Could not find the parquet library.")
+    endif()
+
+    set(PARQUET_ERR_MSG "${PARQUET_ERR_MSG} Looked in ")
+    if ( _parquet_roots )
+      set(PARQUET_ERR_MSG "${PARQUET_ERR_MSG} in ${_parquet_roots}.")
+    else ()
+      set(PARQUET_ERR_MSG "${PARQUET_ERR_MSG} system search paths.")
+    endif ()
+    if (Parquet_FIND_REQUIRED)
+      message(FATAL_ERROR "${PARQUET_ERR_MSG}")
+    else (Parquet_FIND_REQUIRED)
+      message(STATUS "${PARQUET_ERR_MSG}")
+    endif (Parquet_FIND_REQUIRED)
+  endif ()
+endif ()
+
+mark_as_advanced(
+  PARQUET_FOUND
+  PARQUET_INCLUDE_DIR
+  PARQUET_LIBS
+  PARQUET_LIBRARIES
+  PARQUET_STATIC_LIB
+  PARQUET_SHARED_LIB
+)
diff --git a/examples/parquet-arrow/src/reader-writer.cc b/examples/parquet-arrow/src/reader-writer.cc
new file mode 100644
index 0000000..f333cab
--- /dev/null
+++ b/examples/parquet-arrow/src/reader-writer.cc
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <parquet/arrow/reader.h>
+#include <parquet/arrow/writer.h>
+#include <parquet/exception.h>
+
+// #0 Build dummy data to pass around
+// To have some input data, we first create an Arrow Table that holds
+// some data.
+std::shared_ptr<arrow::Table> generate_table() {
+  arrow::Int64Builder i64builder;
+  PARQUET_THROW_NOT_OK(i64builder.Append({1, 2, 3, 4, 5}));
+  std::shared_ptr<arrow::Array> i64array;
+  PARQUET_THROW_NOT_OK(i64builder.Finish(&i64array));
+
+  arrow::StringBuilder strbuilder;
+  PARQUET_THROW_NOT_OK(strbuilder.Append("some"));
+  PARQUET_THROW_NOT_OK(strbuilder.Append("string"));
+  PARQUET_THROW_NOT_OK(strbuilder.Append("content"));
+  PARQUET_THROW_NOT_OK(strbuilder.Append("in"));
+  PARQUET_THROW_NOT_OK(strbuilder.Append("rows"));
+  std::shared_ptr<arrow::Array> strarray;
+  PARQUET_THROW_NOT_OK(strbuilder.Finish(&strarray));
+
+  std::shared_ptr<arrow::Schema> schema = arrow::schema(
+      {arrow::field("int", arrow::int64()), arrow::field("str", arrow::utf8())});
+
+  return arrow::Table::Make(schema, {i64array, strarray});
+}
+
+// #1 Write out the data as a Parquet file
+void write_parquet_file(const arrow::Table& table) {
+  std::shared_ptr<arrow::io::FileOutputStream> outfile;
+  PARQUET_THROW_NOT_OK(
+      arrow::io::FileOutputStream::Open("parquet-arrow-example.parquet", &outfile));
+  // The last argument to the function call is the size of the RowGroup in
+  // the parquet file. Normally you would choose this to be rather large but
+  // for the example, we use a small value to have multiple RowGroups.
+  PARQUET_THROW_NOT_OK(
+      parquet::arrow::WriteTable(table, arrow::default_memory_pool(), outfile, 3));
+}
+
+// #2: Fully read in the file
+void read_whole_file() {
+  std::cout << "Reading parquet-arrow-example.parquet at once" << std::endl;
+  std::shared_ptr<arrow::io::ReadableFile> infile;
+  PARQUET_THROW_NOT_OK(arrow::io::ReadableFile::Open(
+      "parquet-arrow-example.parquet", arrow::default_memory_pool(), &infile));
+
+  std::unique_ptr<parquet::arrow::FileReader> reader;
+  PARQUET_THROW_NOT_OK(
+      parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
+  std::shared_ptr<arrow::Table> table;
+  PARQUET_THROW_NOT_OK(reader->ReadTable(&table));
+  std::cout << "Loaded " << table->num_rows() << " rows in " <<
table->num_columns()
+            << " columns." << std::endl;
+}
+
+// #3: Read only a single RowGroup of the parquet file
+void read_single_rowgroup() {
+  std::cout << "Reading first RowGroup of parquet-arrow-example.parquet" << std::endl;
+  std::shared_ptr<arrow::io::ReadableFile> infile;
+  PARQUET_THROW_NOT_OK(arrow::io::ReadableFile::Open(
+      "parquet-arrow-example.parquet", arrow::default_memory_pool(), &infile));
+
+  std::unique_ptr<parquet::arrow::FileReader> reader;
+  PARQUET_THROW_NOT_OK(
+      parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
+  std::shared_ptr<arrow::Table> table;
+  PARQUET_THROW_NOT_OK(reader->RowGroup(0)->ReadTable(&table));
+  std::cout << "Loaded " << table->num_rows() << " rows in " <<
table->num_columns()
+            << " columns." << std::endl;
+}
+
+// #4: Read only a single column of the whole parquet file
+void read_single_column() {
+  std::cout << "Reading first column of parquet-arrow-example.parquet" << std::endl;
+  std::shared_ptr<arrow::io::ReadableFile> infile;
+  PARQUET_THROW_NOT_OK(arrow::io::ReadableFile::Open(
+      "parquet-arrow-example.parquet", arrow::default_memory_pool(), &infile));
+
+  std::unique_ptr<parquet::arrow::FileReader> reader;
+  PARQUET_THROW_NOT_OK(
+      parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
+  std::shared_ptr<arrow::Array> array;
+  PARQUET_THROW_NOT_OK(reader->ReadColumn(0, &array));
+  PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout));
+  std::cout << std::endl;
+}
+
+// #5: Read only a single column of a RowGroup (this is known as ColumnChunk)
+//     from the Parquet file.
+void read_single_column_chunk() {
+  std::cout << "Reading first ColumnChunk of the first RowGroup of "
+               "parquet-arrow-example.parquet"
+            << std::endl;
+  std::shared_ptr<arrow::io::ReadableFile> infile;
+  PARQUET_THROW_NOT_OK(arrow::io::ReadableFile::Open(
+      "parquet-arrow-example.parquet", arrow::default_memory_pool(), &infile));
+
+  std::unique_ptr<parquet::arrow::FileReader> reader;
+  PARQUET_THROW_NOT_OK(
+      parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
+  std::shared_ptr<arrow::Array> array;
+  PARQUET_THROW_NOT_OK(reader->RowGroup(0)->Column(0)->Read(&array));
+  PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout));
+  std::cout << std::endl;
+}
+
+int main(int argc, char** argv) {
+  std::shared_ptr<arrow::Table> table = generate_table();
+  write_parquet_file(*table);
+  read_whole_file();
+  read_single_rowgroup();
+  read_single_column();
+  read_single_column_chunk();
+}

-- 
To stop receiving notification emails like this one, please contact
uwe@apache.org.

Mime
View raw message