PARQUET-477: Add clang-format / clang-tidy checks to toolchain
I adapted @emkornfield's work from ARROW-71 (https://github.com/apache/arrow/commit/5d129991b3369b0e45cb79d1efe6ba2fd8dd21d0). It's a large diff due to the first reformatting of the codebase. Per https://github.com/travis-ci/apt-source-whitelist/issues/199 we can switch Travis back to Ubuntu 14.04 and maybe upgrade to LLVM 3.8 at some point in the future.
Author: Wes McKinney <wesm@apache.org>
Closes #92 from wesm/PARQUET-477 and squashes the following commits:
8b6e8f0 [Wes McKinney] Statically-link zlib
503e793 [Wes McKinney] Boost 1.55
2c512dc [Wes McKinney] Install newer boost due to C++11 issues on ubuntu precise
514601c [Wes McKinney] Fix build dir in travis script
6c2e7cf [Wes McKinney] Adapt clang-tidy / clang-format tools from Apache Arrow C++ codebase
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/674dbb39
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/674dbb39
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/674dbb39
Branch: refs/heads/master
Commit: 674dbb39c36e44d29ff3c7ce0c87c5abcc772dc7
Parents: dc0fc7d
Author: Wes McKinney <wesm@apache.org>
Authored: Sun May 1 08:56:46 2016 -0700
Committer: Wes McKinney <wesm@apache.org>
Committed: Sun May 1 08:56:46 2016 -0700
----------------------------------------------------------------------
.travis.yml | 26 +--
CMakeLists.txt | 47 ++++-
build-support/run-clang-format.sh | 42 ++++
build-support/run-clang-tidy.sh | 40 ++++
ci/travis_script_cpp.sh | 26 +++
cmake_modules/FindClangTools.cmake | 60 ++++++
src/.clang-format | 65 ++++++
src/.clang-tidy | 13 ++
src/.clang-tidy-ignore | 1 +
src/parquet/api/io.h | 2 +-
src/parquet/api/reader.h | 2 +-
src/parquet/api/schema.h | 2 +-
src/parquet/column/column-reader-test.cc | 54 +++--
src/parquet/column/column-writer-test.cc | 18 +-
src/parquet/column/levels-test.cc | 33 ++--
src/parquet/column/levels.h | 29 +--
src/parquet/column/page.h | 159 ++++++---------
src/parquet/column/reader.cc | 65 +++---
src/parquet/column/reader.h | 33 +---
src/parquet/column/scanner-test.cc | 100 +++++-----
src/parquet/column/scanner.cc | 6 +-
src/parquet/column/scanner.h | 69 +++----
src/parquet/column/test-util.h | 196 ++++++++-----------
src/parquet/column/writer.cc | 101 +++++-----
src/parquet/column/writer.h | 43 ++--
src/parquet/compression/codec-test.cc | 16 +-
src/parquet/compression/codec.cc | 2 +-
src/parquet/compression/codec.h | 19 +-
src/parquet/compression/gzip-codec.cc | 53 ++---
src/parquet/compression/lz4-codec.cc | 10 +-
src/parquet/compression/snappy-codec.cc | 8 +-
src/parquet/encodings/decoder.h | 4 +-
src/parquet/encodings/delta-bit-pack-encoding.h | 10 +-
.../encodings/delta-byte-array-encoding.h | 13 +-
.../delta-length-byte-array-encoding.h | 13 +-
src/parquet/encodings/dictionary-encoding.h | 76 +++----
src/parquet/encodings/encoder.h | 8 +-
src/parquet/encodings/encoding-test.cc | 40 ++--
src/parquet/encodings/plain-encoding.h | 54 +++--
src/parquet/exception.h | 4 +-
src/parquet/file/file-deserialize-test.cc | 64 +++---
src/parquet/file/file-serialize-test.cc | 21 +-
src/parquet/file/reader-internal.cc | 74 +++----
src/parquet/file/reader-internal.h | 24 +--
src/parquet/file/reader.cc | 48 ++---
src/parquet/file/reader.h | 26 +--
src/parquet/file/writer-internal.cc | 55 +++---
src/parquet/file/writer-internal.h | 38 ++--
src/parquet/file/writer.cc | 8 +-
src/parquet/file/writer.h | 20 +-
src/parquet/public-api-test.cc | 2 +-
src/parquet/reader-test.cc | 24 +--
src/parquet/schema/converter.cc | 13 +-
src/parquet/schema/converter.h | 21 +-
src/parquet/schema/descriptor.cc | 14 +-
src/parquet/schema/descriptor.h | 39 ++--
src/parquet/schema/printer.cc | 13 +-
src/parquet/schema/printer.h | 9 +-
src/parquet/schema/schema-converter-test.cc | 47 ++---
src/parquet/schema/schema-descriptor-test.cc | 22 +--
src/parquet/schema/schema-printer-test.cc | 8 +-
src/parquet/schema/schema-types-test.cc | 81 ++++----
src/parquet/schema/test-util.h | 6 +-
src/parquet/schema/types.cc | 86 ++++----
src/parquet/schema/types.h | 150 +++++---------
src/parquet/thrift/util.h | 4 +-
src/parquet/types-test.cc | 69 +++----
src/parquet/types.h | 40 ++--
src/parquet/util/bit-stream-utils.h | 29 ++-
src/parquet/util/bit-stream-utils.inline.h | 10 +-
src/parquet/util/bit-util-test.cc | 18 +-
src/parquet/util/bit-util.h | 88 ++++-----
src/parquet/util/buffer-builder.h | 13 +-
src/parquet/util/buffer-test.cc | 13 +-
src/parquet/util/buffer.cc | 20 +-
src/parquet/util/buffer.h | 59 ++----
src/parquet/util/compiler-util.h | 30 ++-
src/parquet/util/cpu-info.cc | 14 +-
src/parquet/util/cpu-info.h | 18 +-
src/parquet/util/hash-util.h | 33 ++--
src/parquet/util/input-output-test.cc | 14 +-
src/parquet/util/input.cc | 37 ++--
src/parquet/util/input.h | 21 +-
src/parquet/util/logging.h | 55 +++---
src/parquet/util/macros.h | 10 +-
src/parquet/util/mem-allocator-test.cc | 2 +-
src/parquet/util/mem-allocator.cc | 14 +-
src/parquet/util/mem-allocator.h | 14 +-
src/parquet/util/mem-pool-test.cc | 12 +-
src/parquet/util/mem-pool.cc | 36 ++--
src/parquet/util/mem-pool.h | 23 +--
src/parquet/util/output.cc | 11 +-
src/parquet/util/output.h | 4 +-
src/parquet/util/rle-encoding.h | 37 ++--
src/parquet/util/rle-test.cc | 39 ++--
src/parquet/util/sse-util.h | 119 +++++------
src/parquet/util/stopwatch.h | 15 +-
src/parquet/util/test-common.h | 44 ++---
src/parquet/util/test_main.cc | 2 +-
99 files changed, 1589 insertions(+), 1853 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index a540380..87ee43b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,19 +1,21 @@
sudo: required
-dist: trusty
+dist: precise
addons:
apt:
sources:
- ubuntu-toolchain-r-test
- kalakris-cmake
+ - llvm-toolchain-precise-3.7
+ - boost-latest
packages:
+ - clang-format-3.7
+ - clang-tidy-3.7
- gcc-4.9
- g++-4.9
- gcov
- cmake
- valgrind
- - libboost-dev #needed for thrift cpp compilation
- - libboost-program-options-dev #needed for thrift cpp compilation
- - libboost-test-dev #needed for thrift cpp compilation
+ - libboost1.55-all-dev #needed for thrift cpp compilation
- libssl-dev #needed for thrift cpp compilation
- libtool #needed for thrift cpp compilation
- bison #needed for thrift cpp compilation
@@ -28,24 +30,11 @@ matrix:
- source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh
- cmake -DCMAKE_CXX_FLAGS="-Werror" -DPARQUET_TEST_MEMCHECK=ON -DPARQUET_GENERATE_COVERAGE=1 $TRAVIS_BUILD_DIR
- export PARQUET_TEST_DATA=$TRAVIS_BUILD_DIR/data
- script:
- - make lint
- - make -j4 || exit 1
- - ctest || { cat $TRAVIS_BUILD_DIR/parquet-build/Testing/Temporary/LastTest.log; exit 1; }
- - sudo pip install cpp_coveralls
- - export PARQUET_ROOT=$TRAVIS_BUILD_DIR
- - $TRAVIS_BUILD_DIR/ci/upload_coverage.sh
- compiler: clang
os: linux
- script:
- - make -j4 || exit 1
- - ctest
- os: osx
compiler: clang
addons:
- script:
- - make -j4 || exit 1
- - ctest
language: cpp
before_install:
@@ -56,3 +45,6 @@ before_script:
- source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh
- cmake -DCMAKE_CXX_FLAGS="-Werror" $TRAVIS_BUILD_DIR
- export PARQUET_TEST_DATA=$TRAVIS_BUILD_DIR/data
+
+script:
+- $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7764cc0..56e9dea 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,7 +30,21 @@ enable_testing()
# where to find cmake modules
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules")
-set(BUILD_SUPPORT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/build-support)
+set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")
+
+find_package(ClangTools)
+if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1" OR CLANG_TIDY_FOUND)
+ # Generate a Clang compile_commands.json "compilation database" file for use
+ # with various development tools, such as Vim's YouCompleteMe plugin.
+ # See http://clang.llvm.org/docs/JSONCompilationDatabase.html
+ set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
+endif()
+
+find_program(CCACHE_FOUND ccache)
+if(CCACHE_FOUND)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
+endif(CCACHE_FOUND)
if(APPLE)
set(CMAKE_MACOSX_RPATH 1)
@@ -196,7 +210,7 @@ set_target_properties(lz4static PROPERTIES IMPORTED_LOCATION ${LZ4_STATIC_LIB})
find_package(ZLIB REQUIRED)
include_directories(SYSTEM ${ZLIB_INCLUDE_DIRS})
add_library(zlibstatic STATIC IMPORTED)
-set_target_properties(zlibstatic PROPERTIES IMPORTED_LOCATION ${ZLIB_LIBRARIES})
+set_target_properties(zlibstatic PROPERTIES IMPORTED_LOCATION ${ZLIB_STATIC_LIB})
## GTest
find_package(GTest REQUIRED)
@@ -280,6 +294,35 @@ if (UNIX)
`find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h | sed -e '/parquet\\/thrift/g'`)
endif (UNIX)
+############################################################
+# "make format" and "make check-format" targets
+############################################################
+
+if (${CLANG_FORMAT_FOUND})
+ # runs clang format and updates files in place.
+ add_custom_target(format ${BUILD_SUPPORT_DIR}/run-clang-format.sh ${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 1
+ `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h | sed -e '/_generated/g'`)
+
+ # runs clang format and exits with a non-zero exit code if any files need to be reformatted
+ add_custom_target(check-format ${BUILD_SUPPORT_DIR}/run-clang-format.sh ${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 0
+ `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h | sed -e '/_generated/g'`)
+endif()
+
+
+############################################################
+# "make clang-tidy" and "make check-clang-tidy" targets
+############################################################
+
+if (${CLANG_TIDY_FOUND})
+ # runs clang-tidy and attempts to fix any warning automatically
+ add_custom_target(clang-tidy ${BUILD_SUPPORT_DIR}/run-clang-tidy.sh ${CLANG_TIDY_BIN} ${CMAKE_BINARY_DIR}/compile_commands.json 1
+ `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc | sed -e '/_types/g' | sed -e '/_constants/g'`)
+ # runs clang-tidy and exits with a non-zero exit code if any errors are found.
+ add_custom_target(check-clang-tidy ${BUILD_SUPPORT_DIR}/run-clang-tidy.sh ${CLANG_TIDY_BIN} ${CMAKE_BINARY_DIR}/compile_commands.json
+ 0 `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc |grep -v -F -f ${CMAKE_CURRENT_SOURCE_DIR}/src/.clang-tidy-ignore`)
+
+endif()
+
#############################################################
# Test linking
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/build-support/run-clang-format.sh
----------------------------------------------------------------------
diff --git a/build-support/run-clang-format.sh b/build-support/run-clang-format.sh
new file mode 100755
index 0000000..01ddab2
--- /dev/null
+++ b/build-support/run-clang-format.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Runs clang format in the given directory
+# Arguments:
+# $1 - Path to the source tree
+# $2 - Path to the clang format binary
+# $3 - Apply fixes (will raise an error if false and not there where changes)
+# $ARGN - Files to run clang format on
+#
+SOURCE_DIR=$1
+shift
+CLANG_FORMAT=$1
+shift
+APPLY_FIXES=$1
+shift
+
+# clang format will only find its configuration if we are in
+# the source tree or in a path relative to the source tree
+pushd $SOURCE_DIR
+if [ "$APPLY_FIXES" == "1" ]; then
+ $CLANG_FORMAT -i $@
+else
+
+ NUM_CORRECTIONS=`$CLANG_FORMAT -output-replacements-xml $@ | grep offset | wc -l`
+ if [ "$NUM_CORRECTIONS" -gt "0" ]; then
+ echo "clang-format suggested changes, please run 'make format'!!!!"
+ exit 1
+ fi
+fi
+popd
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/build-support/run-clang-tidy.sh
----------------------------------------------------------------------
diff --git a/build-support/run-clang-tidy.sh b/build-support/run-clang-tidy.sh
new file mode 100755
index 0000000..2a4b1c0
--- /dev/null
+++ b/build-support/run-clang-tidy.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+# Runs clang format in the given directory
+# Arguments:
+# $1 - Path to the clang tidy binary
+# $2 - Path to the compile_commands.json to use
+# $3 - Apply fixes (will raise an error if false and not there where changes)
+# $ARGN - Files to run clang-tidy on
+#
+CLANG_TIDY=$1
+shift
+COMPILE_COMMANDS=$1
+shift
+APPLY_FIXES=$1
+shift
+
+# clang format will only find its configuration if we are in
+# the source tree or in a path relative to the source tree
+if [ "$APPLY_FIXES" == "1" ]; then
+ $CLANG_TIDY -p $COMPILE_COMMANDS -fix $@
+else
+ NUM_CORRECTIONS=`$CLANG_TIDY -p $COMPILE_COMMANDS $@ 2>&1 | grep -v Skipping | grep "warnings* generated" | wc -l`
+ if [ "$NUM_CORRECTIONS" -gt "0" ]; then
+ echo "clang-tidy had suggested fixes. Please fix these!!!"
+ exit 1
+ fi
+fi
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/ci/travis_script_cpp.sh
----------------------------------------------------------------------
diff --git a/ci/travis_script_cpp.sh b/ci/travis_script_cpp.sh
new file mode 100755
index 0000000..c8c0ac0
--- /dev/null
+++ b/ci/travis_script_cpp.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+set -e
+
+: ${CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/parquet-build}
+
+pushd $CPP_BUILD_DIR
+
+make lint
+if [ $TRAVIS_OS_NAME == "linux" ]; then
+ make check-format
+ make check-clang-tidy
+fi
+
+if [ $TRAVIS_OS_NAME == "linux" ]; then
+ make -j4 || exit 1
+ ctest || { cat $TRAVIS_BUILD_DIR/parquet-build/Testing/Temporary/LastTest.log; exit 1; }
+ sudo pip install cpp_coveralls
+ export PARQUET_ROOT=$TRAVIS_BUILD_DIR
+ $TRAVIS_BUILD_DIR/ci/upload_coverage.sh
+else
+ make -j4 || exit 1
+ ctest || { cat $TRAVIS_BUILD_DIR/parquet-build/Testing/Temporary/LastTest.log; exit 1; }
+fi
+
+popd
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/cmake_modules/FindClangTools.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/FindClangTools.cmake b/cmake_modules/FindClangTools.cmake
new file mode 100644
index 0000000..c07c7d2
--- /dev/null
+++ b/cmake_modules/FindClangTools.cmake
@@ -0,0 +1,60 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Tries to find the clang-tidy and clang-format modules
+#
+# Usage of this module as follows:
+#
+# find_package(ClangTools)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+# ClangToolsBin_HOME -
+# When set, this path is inspected instead of standard library binary locations
+# to find clang-tidy and clang-format
+#
+# This module defines
+# CLANG_TIDY_BIN, The path to the clang tidy binary
+# CLANG_TIDY_FOUND, Whether clang tidy was found
+# CLANG_FORMAT_BIN, The path to the clang format binary
+# CLANG_TIDY_FOUND, Whether clang format was found
+
+find_program(CLANG_TIDY_BIN
+ NAMES clang-tidy-3.8 clang-tidy-3.7 clang-tidy-3.6 clang-tidy
+ PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
+ NO_DEFAULT_PATH
+)
+
+if ( "${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND" )
+ set(CLANG_TIDY_FOUND 0)
+ message("clang-tidy not found")
+else()
+ set(CLANG_TIDY_FOUND 1)
+ message("clang-tidy found at ${CLANG_TIDY_BIN}")
+endif()
+
+find_program(CLANG_FORMAT_BIN
+ NAMES clang-format-3.8 clang-format-3.7 clang-format-3.6 clang-format
+ PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
+ NO_DEFAULT_PATH
+)
+
+if ( "${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND" )
+ set(CLANG_FORMAT_FOUND 0)
+ message("clang-format not found")
+else()
+ set(CLANG_FORMAT_FOUND 1)
+ message("clang-format found at ${CLANG_FORMAT_BIN}")
+endif()
+
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/.clang-format
----------------------------------------------------------------------
diff --git a/src/.clang-format b/src/.clang-format
new file mode 100644
index 0000000..7d5b3cf
--- /dev/null
+++ b/src/.clang-format
@@ -0,0 +1,65 @@
+---
+Language: Cpp
+# BasedOnStyle: Google
+AccessModifierOffset: -1
+AlignAfterOpenBracket: false
+AlignConsecutiveAssignments: false
+AlignEscapedNewlinesLeft: true
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: true
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+AllowShortIfStatementsOnASingleLine: true
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+ColumnLimit: 90
+CommentPragmas: '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
+IndentCaseLabels: true
+IndentWidth: 2
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakBeforeFirstCallParameter: 1000
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp11
+TabWidth: 8
+UseTab: Never
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/.clang-tidy
----------------------------------------------------------------------
diff --git a/src/.clang-tidy b/src/.clang-tidy
new file mode 100644
index 0000000..6fc3742
--- /dev/null
+++ b/src/.clang-tidy
@@ -0,0 +1,13 @@
+---
+Checks: 'clang-diagnostic-*,clang-analyzer-*,-clang-analyzer-alpha*,google-.*,modernize-.*,readablity-.*'
+HeaderFilterRegex: 'parquet/.*'
+AnalyzeTemporaryDtors: true
+CheckOptions:
+ - key: google-readability-braces-around-statements.ShortStatementLines
+ value: '1'
+ - key: google-readability-function-size.StatementThreshold
+ value: '800'
+ - key: google-readability-namespace-comments.ShortNamespaceLines
+ value: '10'
+ - key: google-readability-namespace-comments.SpacesBeforeComments
+ value: '2'
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/.clang-tidy-ignore
----------------------------------------------------------------------
diff --git a/src/.clang-tidy-ignore b/src/.clang-tidy-ignore
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/.clang-tidy-ignore
@@ -0,0 +1 @@
+
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/parquet/api/io.h
----------------------------------------------------------------------
diff --git a/src/parquet/api/io.h b/src/parquet/api/io.h
index 3a9b148..683dae2 100644
--- a/src/parquet/api/io.h
+++ b/src/parquet/api/io.h
@@ -24,4 +24,4 @@
#include "parquet/util/mem-allocator.h"
#include "parquet/util/output.h"
-#endif // PARQUET_API_IO_H
+#endif // PARQUET_API_IO_H
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/parquet/api/reader.h
----------------------------------------------------------------------
diff --git a/src/parquet/api/reader.h b/src/parquet/api/reader.h
index 41cb06b..572ecf5 100644
--- a/src/parquet/api/reader.h
+++ b/src/parquet/api/reader.h
@@ -29,4 +29,4 @@
// IO
#include "parquet/api/io.h"
-#endif // PARQUET_API_READER_H
+#endif // PARQUET_API_READER_H
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/parquet/api/schema.h
----------------------------------------------------------------------
diff --git a/src/parquet/api/schema.h b/src/parquet/api/schema.h
index aca6c99..523d046 100644
--- a/src/parquet/api/schema.h
+++ b/src/parquet/api/schema.h
@@ -23,4 +23,4 @@
#include "parquet/schema/printer.h"
#include "parquet/schema/types.h"
-#endif // PARQUET_API_SCHEMA_H
+#endif // PARQUET_API_SCHEMA_H
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/parquet/column/column-reader-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/column-reader-test.cc b/src/parquet/column/column-reader-test.cc
index 1db0613..524ec50 100644
--- a/src/parquet/column/column-reader-test.cc
+++ b/src/parquet/column/column-reader-test.cc
@@ -76,19 +76,15 @@ class TestPrimitiveReader : public ::testing::Test {
ASSERT_EQ(num_levels_, batch_actual);
ASSERT_EQ(num_values_, total_values_read);
ASSERT_TRUE(vector_equal(values_, vresult));
- if (max_def_level_ > 0) {
- ASSERT_TRUE(vector_equal(def_levels_, dresult));
- }
- if (max_rep_level_ > 0) {
- ASSERT_TRUE(vector_equal(rep_levels_, rresult));
- }
+ if (max_def_level_ > 0) { ASSERT_TRUE(vector_equal(def_levels_, dresult)); }
+ if (max_rep_level_ > 0) { ASSERT_TRUE(vector_equal(rep_levels_, rresult)); }
// catch improper writes at EOS
batch_actual = reader->ReadBatch(5, nullptr, nullptr, nullptr, &values_read);
ASSERT_EQ(0, batch_actual);
ASSERT_EQ(0, values_read);
}
- void ExecutePlain(int num_pages, int levels_per_page, const ColumnDescriptor *d) {
+ void ExecutePlain(int num_pages, int levels_per_page, const ColumnDescriptor* d) {
num_values_ = MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_,
rep_levels_, values_, data_buffer_, pages_, Encoding::PLAIN);
num_levels_ = num_pages * levels_per_page;
@@ -101,7 +97,7 @@ class TestPrimitiveReader : public ::testing::Test {
reader_.reset();
}
- void ExecuteDict(int num_pages, int levels_per_page, const ColumnDescriptor *d) {
+ void ExecuteDict(int num_pages, int levels_per_page, const ColumnDescriptor* d) {
num_values_ = MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_,
rep_levels_, values_, data_buffer_, pages_, Encoding::RLE_DICTIONARY);
num_levels_ = num_pages * levels_per_page;
@@ -114,12 +110,12 @@ class TestPrimitiveReader : public ::testing::Test {
int num_values_;
int16_t max_def_level_;
int16_t max_rep_level_;
- vector<shared_ptr<Page> > pages_;
+ vector<shared_ptr<Page>> pages_;
std::shared_ptr<ColumnReader> reader_;
vector<int32_t> values_;
vector<int16_t> def_levels_;
vector<int16_t> rep_levels_;
- vector<uint8_t> data_buffer_; // For BA and FLBA
+ vector<uint8_t> data_buffer_; // For BA and FLBA
};
TEST_F(TestPrimitiveReader, TestInt32FlatRequired) {
@@ -162,10 +158,10 @@ TEST_F(TestPrimitiveReader, TestDictionaryEncodedPages) {
const ColumnDescriptor descr(type, max_def_level_, max_rep_level_);
shared_ptr<OwnedMutableBuffer> dummy = std::make_shared<OwnedMutableBuffer>();
- shared_ptr<DictionaryPage> dict_page = std::make_shared<DictionaryPage>(dummy,
- 0, Encoding::PLAIN);
- shared_ptr<DataPage> data_page = MakeDataPage<Int32Type>(&descr, {}, 0,
- Encoding::RLE_DICTIONARY, {}, 0, {}, 0, {}, 0);
+ shared_ptr<DictionaryPage> dict_page =
+ std::make_shared<DictionaryPage>(dummy, 0, Encoding::PLAIN);
+ shared_ptr<DataPage> data_page = MakeDataPage<Int32Type>(
+ &descr, {}, 0, Encoding::RLE_DICTIONARY, {}, 0, {}, 0, {}, 0);
pages_.push_back(dict_page);
pages_.push_back(data_page);
InitReader(&descr);
@@ -173,10 +169,9 @@ TEST_F(TestPrimitiveReader, TestDictionaryEncodedPages) {
ASSERT_NO_THROW(reader_->HasNext());
pages_.clear();
- dict_page = std::make_shared<DictionaryPage>(dummy,
- 0, Encoding::PLAIN_DICTIONARY);
- data_page = MakeDataPage<Int32Type>(&descr, {}, 0,
- Encoding::PLAIN_DICTIONARY, {}, 0, {}, 0, {}, 0);
+ dict_page = std::make_shared<DictionaryPage>(dummy, 0, Encoding::PLAIN_DICTIONARY);
+ data_page = MakeDataPage<Int32Type>(
+ &descr, {}, 0, Encoding::PLAIN_DICTIONARY, {}, 0, {}, 0, {}, 0);
pages_.push_back(dict_page);
pages_.push_back(data_page);
InitReader(&descr);
@@ -184,26 +179,25 @@ TEST_F(TestPrimitiveReader, TestDictionaryEncodedPages) {
ASSERT_NO_THROW(reader_->HasNext());
pages_.clear();
- data_page = MakeDataPage<Int32Type>(&descr, {}, 0,
- Encoding::RLE_DICTIONARY, {}, 0, {}, 0, {}, 0);
+ data_page = MakeDataPage<Int32Type>(
+ &descr, {}, 0, Encoding::RLE_DICTIONARY, {}, 0, {}, 0, {}, 0);
pages_.push_back(data_page);
InitReader(&descr);
// Tests dictionary page must occur before data page
ASSERT_THROW(reader_->HasNext(), ParquetException);
pages_.clear();
- dict_page = std::make_shared<DictionaryPage>(dummy,
- 0, Encoding::DELTA_BYTE_ARRAY);
+ dict_page = std::make_shared<DictionaryPage>(dummy, 0, Encoding::DELTA_BYTE_ARRAY);
pages_.push_back(dict_page);
InitReader(&descr);
// Tests only RLE_DICTIONARY is supported
ASSERT_THROW(reader_->HasNext(), ParquetException);
pages_.clear();
- shared_ptr<DictionaryPage> dict_page1 = std::make_shared<DictionaryPage>(dummy,
- 0, Encoding::PLAIN_DICTIONARY);
- shared_ptr<DictionaryPage> dict_page2 = std::make_shared<DictionaryPage>(dummy,
- 0, Encoding::PLAIN);
+ shared_ptr<DictionaryPage> dict_page1 =
+ std::make_shared<DictionaryPage>(dummy, 0, Encoding::PLAIN_DICTIONARY);
+ shared_ptr<DictionaryPage> dict_page2 =
+ std::make_shared<DictionaryPage>(dummy, 0, Encoding::PLAIN);
pages_.push_back(dict_page1);
pages_.push_back(dict_page2);
InitReader(&descr);
@@ -211,8 +205,8 @@ TEST_F(TestPrimitiveReader, TestDictionaryEncodedPages) {
ASSERT_THROW(reader_->HasNext(), ParquetException);
pages_.clear();
- data_page = MakeDataPage<Int32Type>(&descr, {}, 0,
- Encoding::DELTA_BYTE_ARRAY, {}, 0, {}, 0, {}, 0);
+ data_page = MakeDataPage<Int32Type>(
+ &descr, {}, 0, Encoding::DELTA_BYTE_ARRAY, {}, 0, {}, 0, {}, 0);
pages_.push_back(data_page);
InitReader(&descr);
// unsupported encoding
@@ -220,5 +214,5 @@ TEST_F(TestPrimitiveReader, TestDictionaryEncodedPages) {
pages_.clear();
}
-} // namespace test
-} // namespace parquet
+} // namespace test
+} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/parquet/column/column-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/column-writer-test.cc b/src/parquet/column/column-writer-test.cc
index df5aa9a..653572a 100644
--- a/src/parquet/column/column-writer-test.cc
+++ b/src/parquet/column/column-writer-test.cc
@@ -70,8 +70,8 @@ class TestPrimitiveWriter : public ::testing::Test {
sink_.reset(new InMemoryOutputStream());
std::unique_ptr<SerializedPageWriter> pager(
new SerializedPageWriter(sink_.get(), Compression::UNCOMPRESSED, &metadata_));
- return std::unique_ptr<Int64Writer>(new Int64Writer(schema_.get(), std::move(pager),
- output_size));
+ return std::unique_ptr<Int64Writer>(
+ new Int64Writer(schema_.get(), std::move(pager), output_size));
}
void ReadColumn() {
@@ -138,8 +138,8 @@ TEST_F(TestPrimitiveWriter, OptionalRepeated) {
std::vector<int16_t> repetition_levels(100, 0);
auto writer = BuildWriter();
- writer->WriteBatch(values.size(), definition_levels.data(),
- repetition_levels.data(), values.data());
+ writer->WriteBatch(
+ values.size(), definition_levels.data(), repetition_levels.data(), values.data());
writer->Close();
ReadColumn();
@@ -176,8 +176,8 @@ TEST_F(TestPrimitiveWriter, OptionalRepeatedTooFewRows) {
repetition_levels[3] = 1;
auto writer = BuildWriter();
- writer->WriteBatch(values.size(), definition_levels.data(),
- repetition_levels.data(), values.data());
+ writer->WriteBatch(
+ values.size(), definition_levels.data(), repetition_levels.data(), values.data());
ASSERT_THROW(writer->Close(), ParquetException);
}
@@ -196,7 +196,5 @@ TEST_F(TestPrimitiveWriter, RequiredNonRepeatedLargeChunk) {
ASSERT_EQ(values, values_out_);
}
-} // namespace test
-} // namespace parquet
-
-
+} // namespace test
+} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/parquet/column/levels-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/levels-test.cc b/src/parquet/column/levels-test.cc
index 87e596d..b15c0af 100644
--- a/src/parquet/column/levels-test.cc
+++ b/src/parquet/column/levels-test.cc
@@ -28,8 +28,8 @@ using std::string;
namespace parquet {
-void GenerateLevels(int min_repeat_factor, int max_repeat_factor,
- int max_level, std::vector<int16_t>& input_levels) {
+void GenerateLevels(int min_repeat_factor, int max_repeat_factor, int max_level,
+ std::vector<int16_t>& input_levels) {
// for each repetition count upto max_repeat_factor
for (int repeat = min_repeat_factor; repeat <= max_repeat_factor; repeat++) {
// repeat count increases by a factor of 2 for every iteration
@@ -56,14 +56,13 @@ void EncodeLevels(Encoding::type encoding, int max_level, int num_levels,
// encode levels
if (encoding == Encoding::RLE) {
// leave space to write the rle length value
- encoder.Init(encoding, max_level, num_levels,
- bytes.data() + sizeof(uint32_t), bytes.size());
+ encoder.Init(
+ encoding, max_level, num_levels, bytes.data() + sizeof(uint32_t), bytes.size());
levels_count = encoder.Encode(num_levels, input_levels);
(reinterpret_cast<uint32_t*>(bytes.data()))[0] = encoder.len();
} else {
- encoder.Init(encoding, max_level, num_levels,
- bytes.data(), bytes.size());
+ encoder.Init(encoding, max_level, num_levels, bytes.data(), bytes.size());
levels_count = encoder.Encode(num_levels, input_levels);
}
ASSERT_EQ(num_levels, levels_count);
@@ -94,7 +93,7 @@ void VerifyDecodingLevels(Encoding::type encoding, int max_level,
}
// check the remaining levels
int num_levels_completed = decode_count * (num_levels / decode_count);
- int num_remaining_levels = num_levels - num_levels_completed;
+ int num_remaining_levels = num_levels - num_levels_completed;
if (num_remaining_levels > 0) {
levels_count = decoder.Decode(num_remaining_levels, output_levels.data());
ASSERT_EQ(num_remaining_levels, levels_count);
@@ -102,7 +101,7 @@ void VerifyDecodingLevels(Encoding::type encoding, int max_level,
EXPECT_EQ(input_levels[i + num_levels_completed], output_levels[i]);
}
}
- //Test zero Decode values
+ // Test zero Decode values
ASSERT_EQ(0, decoder.Decode(1, output_levels.data()));
}
@@ -133,12 +132,11 @@ void VerifyDecodingMultipleSetData(Encoding::type encoding, int max_level,
// increase the repetition count for each iteration by a factor of 2
TEST(TestLevels, TestLevelsDecodeMultipleBitWidth) {
int min_repeat_factor = 0;
- int max_repeat_factor = 7; // 128
+ int max_repeat_factor = 7; // 128
int max_bit_width = 8;
std::vector<int16_t> input_levels;
std::vector<uint8_t> bytes;
- Encoding::type encodings[2] = {Encoding::RLE,
- Encoding::BIT_PACKED};
+ Encoding::type encodings[2] = {Encoding::RLE, Encoding::BIT_PACKED};
// for each encoding
for (int encode = 0; encode < 2; encode++) {
@@ -150,8 +148,7 @@ TEST(TestLevels, TestLevelsDecodeMultipleBitWidth) {
// find the maximum level for the current bit_width
int max_level = (1 << bit_width) - 1;
// Generate levels
- GenerateLevels(min_repeat_factor, max_repeat_factor,
- max_level, input_levels);
+ GenerateLevels(min_repeat_factor, max_repeat_factor, max_level, input_levels);
EncodeLevels(encoding, max_level, input_levels.size(), input_levels.data(), bytes);
VerifyDecodingLevels(encoding, max_level, input_levels, bytes);
input_levels.clear();
@@ -162,15 +159,13 @@ TEST(TestLevels, TestLevelsDecodeMultipleBitWidth) {
// Test multiple decoder SetData calls
TEST(TestLevels, TestLevelsDecodeMultipleSetData) {
int min_repeat_factor = 3;
- int max_repeat_factor = 7; // 128
+ int max_repeat_factor = 7; // 128
int bit_width = 8;
int max_level = (1 << bit_width) - 1;
std::vector<int16_t> input_levels;
std::vector<std::vector<uint8_t>> bytes;
- Encoding::type encodings[2] = {Encoding::RLE,
- Encoding::BIT_PACKED};
- GenerateLevels(min_repeat_factor, max_repeat_factor,
- max_level, input_levels);
+ Encoding::type encodings[2] = {Encoding::RLE, Encoding::BIT_PACKED};
+ GenerateLevels(min_repeat_factor, max_repeat_factor, max_level, input_levels);
int num_levels = input_levels.size();
int setdata_factor = 8;
int split_level_size = num_levels / setdata_factor;
@@ -188,4 +183,4 @@ TEST(TestLevels, TestLevelsDecodeMultipleSetData) {
}
}
-} // namespace parquet
+} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/parquet/column/levels.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/levels.h b/src/parquet/column/levels.h
index f57708d..fd84ec9 100644
--- a/src/parquet/column/levels.h
+++ b/src/parquet/column/levels.h
@@ -32,8 +32,8 @@ class LevelEncoder {
LevelEncoder() {}
// Initialize the LevelEncoder.
- void Init(Encoding::type encoding, int16_t max_level,
- int num_buffered_values, uint8_t* data, int data_size) {
+ void Init(Encoding::type encoding, int16_t max_level, int num_buffered_values,
+ uint8_t* data, int data_size) {
bit_width_ = BitUtil::Log2(max_level + 1);
encoding_ = encoding;
switch (encoding) {
@@ -60,18 +60,14 @@ class LevelEncoder {
if (encoding_ == Encoding::RLE) {
for (int i = 0; i < batch_size; ++i) {
- if (!rle_encoder_->Put(*(levels + i))) {
- break;
- }
+ if (!rle_encoder_->Put(*(levels + i))) { break; }
++num_encoded;
}
rle_encoder_->Flush();
rle_length_ = rle_encoder_->len();
} else {
for (int i = 0; i < batch_size; ++i) {
- if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) {
- break;
- }
+ if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) { break; }
++num_encoded;
}
bit_packed_encoder_->Flush();
@@ -94,15 +90,14 @@ class LevelEncoder {
std::unique_ptr<BitWriter> bit_packed_encoder_;
};
-
class LevelDecoder {
public:
LevelDecoder() : num_values_remaining_(0) {}
// Initialize the LevelDecoder state with new data
// and return the number of bytes consumed
- int SetData(Encoding::type encoding, int16_t max_level,
- int num_buffered_values, const uint8_t* data) {
+ int SetData(Encoding::type encoding, int16_t max_level, int num_buffered_values,
+ const uint8_t* data) {
uint32_t num_bytes = 0;
encoding_ = encoding;
num_values_remaining_ = num_buffered_values;
@@ -140,16 +135,12 @@ class LevelDecoder {
int num_values = std::min(num_values_remaining_, batch_size);
if (encoding_ == Encoding::RLE) {
for (int i = 0; i < num_values; ++i) {
- if (!rle_decoder_->Get(levels + i)) {
- break;
- }
+ if (!rle_decoder_->Get(levels + i)) { break; }
++num_decoded;
}
} else {
for (int i = 0; i < num_values; ++i) {
- if (!bit_packed_decoder_->GetValue(bit_width_, levels + i)) {
- break;
- }
+ if (!bit_packed_decoder_->GetValue(bit_width_, levels + i)) { break; }
++num_decoded;
}
}
@@ -165,5 +156,5 @@ class LevelDecoder {
std::unique_ptr<BitReader> bit_packed_decoder_;
};
-} // namespace parquet
-#endif // PARQUET_COLUMN_LEVELS_H
+} // namespace parquet
+#endif // PARQUET_COLUMN_LEVELS_H
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/parquet/column/page.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/page.h b/src/parquet/column/page.h
index 709f7c8..8bad57f 100644
--- a/src/parquet/column/page.h
+++ b/src/parquet/column/page.h
@@ -40,67 +40,46 @@ namespace parquet {
// here, both on the read and write path
class Page {
public:
- Page(const std::shared_ptr<Buffer>& buffer, PageType::type type) :
- buffer_(buffer),
- type_(type) {}
+ Page(const std::shared_ptr<Buffer>& buffer, PageType::type type)
+ : buffer_(buffer), type_(type) {}
- PageType::type type() const {
- return type_;
- }
+ PageType::type type() const { return type_; }
// @returns: a pointer to the page's data
- const uint8_t* data() const {
- return buffer_->data();
- }
+ const uint8_t* data() const { return buffer_->data(); }
// @returns: the total size in bytes of the page's data buffer
- int32_t size() const {
- return buffer_->size();
- }
+ int32_t size() const { return buffer_->size(); }
private:
std::shared_ptr<Buffer> buffer_;
PageType::type type_;
};
-
class DataPage : public Page {
public:
- DataPage(const std::shared_ptr<Buffer>& buffer,
- int32_t num_values, Encoding::type encoding,
- Encoding::type definition_level_encoding,
- Encoding::type repetition_level_encoding) :
- Page(buffer, PageType::DATA_PAGE),
- num_values_(num_values),
- encoding_(encoding),
- definition_level_encoding_(definition_level_encoding),
- repetition_level_encoding_(repetition_level_encoding) {}
+ DataPage(const std::shared_ptr<Buffer>& buffer, int32_t num_values,
+ Encoding::type encoding, Encoding::type definition_level_encoding,
+ Encoding::type repetition_level_encoding)
+ : Page(buffer, PageType::DATA_PAGE),
+ num_values_(num_values),
+ encoding_(encoding),
+ definition_level_encoding_(definition_level_encoding),
+ repetition_level_encoding_(repetition_level_encoding) {}
- int32_t num_values() const {
- return num_values_;
- }
+ int32_t num_values() const { return num_values_; }
- Encoding::type encoding() const {
- return encoding_;
- }
+ Encoding::type encoding() const { return encoding_; }
- Encoding::type repetition_level_encoding() const {
- return repetition_level_encoding_;
- }
+ Encoding::type repetition_level_encoding() const { return repetition_level_encoding_; }
- Encoding::type definition_level_encoding() const {
- return definition_level_encoding_;
- }
+ Encoding::type definition_level_encoding() const { return definition_level_encoding_; }
// DataPageHeader::statistics::max field, if it was set
- const uint8_t* max() const {
- return reinterpret_cast<const uint8_t*>(max_.c_str());
- }
+ const uint8_t* max() const { return reinterpret_cast<const uint8_t*>(max_.c_str()); }
// DataPageHeader::statistics::min field, if it was set
- const uint8_t* min() const {
- return reinterpret_cast<const uint8_t*>(min_.c_str());
- }
+ const uint8_t* min() const { return reinterpret_cast<const uint8_t*>(min_.c_str()); }
private:
int32_t num_values_;
@@ -114,50 +93,33 @@ class DataPage : public Page {
std::string min_;
};
-
class DataPageV2 : public Page {
public:
- DataPageV2(const std::shared_ptr<Buffer>& buffer,
- int32_t num_values, int32_t num_nulls, int32_t num_rows,
- Encoding::type encoding,
- int32_t definition_levels_byte_length,
- int32_t repetition_levels_byte_length, bool is_compressed = false) :
- Page(buffer, PageType::DATA_PAGE_V2),
- num_values_(num_values),
- num_nulls_(num_nulls),
- num_rows_(num_rows),
- encoding_(encoding),
- definition_levels_byte_length_(definition_levels_byte_length),
- repetition_levels_byte_length_(repetition_levels_byte_length),
- is_compressed_(is_compressed) {}
-
- int32_t num_values() const {
- return num_values_;
- }
-
- int32_t num_nulls() const {
- return num_nulls_;
- }
-
- int32_t num_rows() const {
- return num_rows_;
- }
-
- Encoding::type encoding() const {
- return encoding_;
- }
-
- int32_t definition_levels_byte_length() const {
- return definition_levels_byte_length_;
- }
-
- int32_t repetition_levels_byte_length() const {
- return repetition_levels_byte_length_;
- }
-
- bool is_compressed() const {
- return is_compressed_;
- }
+ DataPageV2(const std::shared_ptr<Buffer>& buffer, int32_t num_values, int32_t num_nulls,
+ int32_t num_rows, Encoding::type encoding, int32_t definition_levels_byte_length,
+ int32_t repetition_levels_byte_length, bool is_compressed = false)
+ : Page(buffer, PageType::DATA_PAGE_V2),
+ num_values_(num_values),
+ num_nulls_(num_nulls),
+ num_rows_(num_rows),
+ encoding_(encoding),
+ definition_levels_byte_length_(definition_levels_byte_length),
+ repetition_levels_byte_length_(repetition_levels_byte_length),
+ is_compressed_(is_compressed) {}
+
+ int32_t num_values() const { return num_values_; }
+
+ int32_t num_nulls() const { return num_nulls_; }
+
+ int32_t num_rows() const { return num_rows_; }
+
+ Encoding::type encoding() const { return encoding_; }
+
+ int32_t definition_levels_byte_length() const { return definition_levels_byte_length_; }
+
+ int32_t repetition_levels_byte_length() const { return repetition_levels_byte_length_; }
+
+ bool is_compressed() const { return is_compressed_; }
private:
int32_t num_values_;
@@ -171,27 +133,20 @@ class DataPageV2 : public Page {
// TODO(wesm): format::DataPageHeaderV2.statistics
};
-
class DictionaryPage : public Page {
public:
- DictionaryPage(const std::shared_ptr<Buffer>& buffer,
- int32_t num_values, Encoding::type encoding, bool is_sorted = false) :
- Page(buffer, PageType::DICTIONARY_PAGE),
- num_values_(num_values),
- encoding_(encoding),
- is_sorted_(is_sorted) {}
+ DictionaryPage(const std::shared_ptr<Buffer>& buffer, int32_t num_values,
+ Encoding::type encoding, bool is_sorted = false)
+ : Page(buffer, PageType::DICTIONARY_PAGE),
+ num_values_(num_values),
+ encoding_(encoding),
+ is_sorted_(is_sorted) {}
- int32_t num_values() const {
- return num_values_;
- }
+ int32_t num_values() const { return num_values_; }
- Encoding::type encoding() const {
- return encoding_;
- }
+ Encoding::type encoding() const { return encoding_; }
- bool is_sorted() const {
- return is_sorted_;
- }
+ bool is_sorted() const { return is_sorted_; }
private:
int32_t num_values_;
@@ -220,10 +175,10 @@ class PageWriter {
const std::shared_ptr<Buffer>& definition_levels,
Encoding::type definition_level_encoding,
const std::shared_ptr<Buffer>& repetition_levels,
- Encoding::type repetition_level_encoding,
- const std::shared_ptr<Buffer>& values, Encoding::type encoding) = 0;
+ Encoding::type repetition_level_encoding, const std::shared_ptr<Buffer>& values,
+ Encoding::type encoding) = 0;
};
-} // namespace parquet
+} // namespace parquet
-#endif // PARQUET_COLUMN_PAGE_H
+#endif // PARQUET_COLUMN_PAGE_H
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/parquet/column/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/reader.cc b/src/parquet/column/reader.cc
index 4598dfb..a13dfd3 100644
--- a/src/parquet/column/reader.cc
+++ b/src/parquet/column/reader.cc
@@ -30,11 +30,11 @@ namespace parquet {
ColumnReader::ColumnReader(const ColumnDescriptor* descr,
std::unique_ptr<PageReader> pager, MemoryAllocator* allocator)
- : descr_(descr),
- pager_(std::move(pager)),
- num_buffered_values_(0),
- num_decoded_values_(0),
- allocator_(allocator) {}
+ : descr_(descr),
+ pager_(std::move(pager)),
+ num_buffered_values_(0),
+ num_decoded_values_(0),
+ allocator_(allocator) {}
template <typename DType>
void TypedColumnReader<DType>::ConfigureDictionary(const DictionaryPage* page) {
@@ -60,7 +60,7 @@ void TypedColumnReader<DType>::ConfigureDictionary(const DictionaryPage* page) {
// TODO(wesm): investigate whether this all-or-nothing decoding of the
// dictionary makes sense and whether performance can be improved
- auto decoder = std::make_shared<DictionaryDecoder<DType> >(descr_, allocator_);
+ auto decoder = std::make_shared<DictionaryDecoder<DType>>(descr_, allocator_);
decoder->SetDict(&dictionary);
decoders_[encoding] = decoder;
} else {
@@ -73,8 +73,7 @@ void TypedColumnReader<DType>::ConfigureDictionary(const DictionaryPage* page) {
// PLAIN_DICTIONARY is deprecated but used to be used as a dictionary index
// encoding.
static bool IsDictionaryIndexEncoding(const Encoding::type& e) {
- return e == Encoding::RLE_DICTIONARY ||
- e == Encoding::PLAIN_DICTIONARY;
+ return e == Encoding::RLE_DICTIONARY || e == Encoding::PLAIN_DICTIONARY;
}
template <typename DType>
@@ -108,24 +107,24 @@ bool TypedColumnReader<DType>::ReadNewPage() {
// the page size to determine the number of bytes in the encoded data.
int64_t data_size = page->size();
- //Data page Layout: Repetition Levels - Definition Levels - encoded values.
- //Levels are encoded as rle or bit-packed.
- //Init repetition levels
+ // Data page Layout: Repetition Levels - Definition Levels - encoded values.
+ // Levels are encoded as rle or bit-packed.
+ // Init repetition levels
if (descr_->max_repetition_level() > 0) {
- int64_t rep_levels_bytes = repetition_level_decoder_.SetData(
- page->repetition_level_encoding(), descr_->max_repetition_level(),
- num_buffered_values_, buffer);
+ int64_t rep_levels_bytes =
+ repetition_level_decoder_.SetData(page->repetition_level_encoding(),
+ descr_->max_repetition_level(), num_buffered_values_, buffer);
buffer += rep_levels_bytes;
data_size -= rep_levels_bytes;
}
- //TODO figure a way to set max_definition_level_ to 0
- //if the initial value is invalid
+ // TODO figure a way to set max_definition_level_ to 0
+ // if the initial value is invalid
- //Init definition levels
+ // Init definition levels
if (descr_->max_definition_level() > 0) {
- int64_t def_levels_bytes = definition_level_decoder_.SetData(
- page->definition_level_encoding(), descr_->max_definition_level(),
- num_buffered_values_, buffer);
+ int64_t def_levels_bytes =
+ definition_level_decoder_.SetData(page->definition_level_encoding(),
+ descr_->max_definition_level(), num_buffered_values_, buffer);
buffer += def_levels_bytes;
data_size -= def_levels_bytes;
}
@@ -134,14 +133,12 @@ bool TypedColumnReader<DType>::ReadNewPage() {
// first page with this encoding.
Encoding::type encoding = page->encoding();
- if (IsDictionaryIndexEncoding(encoding)) {
- encoding = Encoding::RLE_DICTIONARY;
- }
+ if (IsDictionaryIndexEncoding(encoding)) { encoding = Encoding::RLE_DICTIONARY; }
auto it = decoders_.find(static_cast<int>(encoding));
if (it != decoders_.end()) {
if (encoding == Encoding::RLE_DICTIONARY) {
- DCHECK(current_decoder_->encoding() == Encoding::RLE_DICTIONARY);
+ DCHECK(current_decoder_->encoding() == Encoding::RLE_DICTIONARY);
}
current_decoder_ = it->second.get();
} else {
@@ -179,26 +176,20 @@ bool TypedColumnReader<DType>::ReadNewPage() {
// Batch read APIs
int64_t ColumnReader::ReadDefinitionLevels(int64_t batch_size, int16_t* levels) {
- if (descr_->max_definition_level() == 0) {
- return 0;
- }
+ if (descr_->max_definition_level() == 0) { return 0; }
return definition_level_decoder_.Decode(batch_size, levels);
}
int64_t ColumnReader::ReadRepetitionLevels(int64_t batch_size, int16_t* levels) {
- if (descr_->max_repetition_level() == 0) {
- return 0;
- }
+ if (descr_->max_repetition_level() == 0) { return 0; }
return repetition_level_decoder_.Decode(batch_size, levels);
}
// ----------------------------------------------------------------------
// Dynamic column reader constructor
-std::shared_ptr<ColumnReader> ColumnReader::Make(
- const ColumnDescriptor* descr,
- std::unique_ptr<PageReader> pager,
- MemoryAllocator* allocator) {
+std::shared_ptr<ColumnReader> ColumnReader::Make(const ColumnDescriptor* descr,
+ std::unique_ptr<PageReader> pager, MemoryAllocator* allocator) {
switch (descr->physical_type()) {
case Type::BOOLEAN:
return std::make_shared<BoolReader>(descr, std::move(pager), allocator);
@@ -215,8 +206,8 @@ std::shared_ptr<ColumnReader> ColumnReader::Make(
case Type::BYTE_ARRAY:
return std::make_shared<ByteArrayReader>(descr, std::move(pager), allocator);
case Type::FIXED_LEN_BYTE_ARRAY:
- return std::make_shared<FixedLenByteArrayReader>(descr,
- std::move(pager), allocator);
+ return std::make_shared<FixedLenByteArrayReader>(
+ descr, std::move(pager), allocator);
default:
ParquetException::NYI("type reader not implemented");
}
@@ -236,4 +227,4 @@ template class TypedColumnReader<DoubleType>;
template class TypedColumnReader<ByteArrayType>;
template class TypedColumnReader<FLBAType>;
-} // namespace parquet
+} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/parquet/column/reader.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/reader.h b/src/parquet/column/reader.h
index 7704c52..926a2fd 100644
--- a/src/parquet/column/reader.h
+++ b/src/parquet/column/reader.h
@@ -47,20 +47,14 @@ class ColumnReader {
// Either there is no data page available yet, or the data page has been
// exhausted
if (num_buffered_values_ == 0 || num_decoded_values_ == num_buffered_values_) {
- if (!ReadNewPage() || num_buffered_values_ == 0) {
- return false;
- }
+ if (!ReadNewPage() || num_buffered_values_ == 0) { return false; }
}
return true;
}
- Type::type type() const {
- return descr_->physical_type();
- }
+ Type::type type() const { return descr_->physical_type(); }
- const ColumnDescriptor* descr() const {
- return descr_;
- }
+ const ColumnDescriptor* descr() const { return descr_; }
protected:
virtual bool ReadNewPage() = 0;
@@ -107,12 +101,9 @@ class TypedColumnReader : public ColumnReader {
public:
typedef typename DType::c_type T;
- TypedColumnReader(const ColumnDescriptor* schema,
- std::unique_ptr<PageReader> pager,
- MemoryAllocator* allocator = default_allocator()) :
- ColumnReader(schema, std::move(pager), allocator),
- current_decoder_(NULL) {
- }
+ TypedColumnReader(const ColumnDescriptor* schema, std::unique_ptr<PageReader> pager,
+ MemoryAllocator* allocator = default_allocator())
+ : ColumnReader(schema, std::move(pager), allocator), current_decoder_(NULL) {}
// Read a batch of repetition levels, definition levels, and values from the
// column.
@@ -145,14 +136,13 @@ class TypedColumnReader : public ColumnReader {
// Map of encoding type to the respective decoder object. For example, a
// column chunk's data pages may include both dictionary-encoded and
// plain-encoded data.
- std::unordered_map<int, std::shared_ptr<DecoderType> > decoders_;
+ std::unordered_map<int, std::shared_ptr<DecoderType>> decoders_;
void ConfigureDictionary(const DictionaryPage* page);
DecoderType* current_decoder_;
};
-
template <typename DType>
inline int64_t TypedColumnReader<DType>::ReadValues(int64_t batch_size, T* out) {
int64_t num_decoded = current_decoder_->Decode(out, batch_size);
@@ -183,9 +173,7 @@ inline int64_t TypedColumnReader<DType>::ReadBatch(int batch_size, int16_t* def_
// TODO(wesm): this tallying of values-to-decode can be performed with better
// cache-efficiency if fused with the level decoding.
for (int64_t i = 0; i < num_def_levels; ++i) {
- if (def_levels[i] == descr_->max_definition_level()) {
- ++values_to_read;
- }
+ if (def_levels[i] == descr_->max_definition_level()) { ++values_to_read; }
}
} else {
// Required field, read all values
@@ -207,7 +195,6 @@ inline int64_t TypedColumnReader<DType>::ReadBatch(int batch_size, int16_t* def_
return total_values;
}
-
typedef TypedColumnReader<BooleanType> BoolReader;
typedef TypedColumnReader<Int32Type> Int32Reader;
typedef TypedColumnReader<Int64Type> Int64Reader;
@@ -217,6 +204,6 @@ typedef TypedColumnReader<DoubleType> DoubleReader;
typedef TypedColumnReader<ByteArrayType> ByteArrayReader;
typedef TypedColumnReader<FLBAType> FixedLenByteArrayReader;
-} // namespace parquet
+} // namespace parquet
-#endif // PARQUET_COLUMN_READER_H
+#endif // PARQUET_COLUMN_READER_H
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/parquet/column/scanner-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/scanner-test.cc b/src/parquet/column/scanner-test.cc
index 78bc3c9..1e3ce74 100644
--- a/src/parquet/column/scanner-test.cc
+++ b/src/parquet/column/scanner-test.cc
@@ -48,59 +48,52 @@ bool operator==(const FixedLenByteArray& a, const FixedLenByteArray& b) {
namespace test {
-template<>
-void InitValues<bool>(int num_values, vector<bool>& values,
- vector<uint8_t>& buffer) {
+template <>
+void InitValues<bool>(int num_values, vector<bool>& values, vector<uint8_t>& buffer) {
values = flip_coins(num_values, 0);
}
-template<>
-void InitValues<Int96>(int num_values, vector<Int96>& values,
- vector<uint8_t>& buffer) {
+template <>
+void InitValues<Int96>(int num_values, vector<Int96>& values, vector<uint8_t>& buffer) {
random_Int96_numbers(num_values, 0, std::numeric_limits<int32_t>::min(),
std::numeric_limits<int32_t>::max(), values.data());
}
-template<>
-void InitValues<ByteArray>(int num_values, vector<ByteArray>& values,
- vector<uint8_t>& buffer) {
+template <>
+void InitValues<ByteArray>(
+ int num_values, vector<ByteArray>& values, vector<uint8_t>& buffer) {
int max_byte_array_len = 12;
int num_bytes = max_byte_array_len + sizeof(uint32_t);
size_t nbytes = num_values * num_bytes;
buffer.resize(nbytes);
- random_byte_array(num_values, 0, buffer.data(), values.data(),
- max_byte_array_len);
+ random_byte_array(num_values, 0, buffer.data(), values.data(), max_byte_array_len);
}
-template<>
-void InitValues<FLBA>(int num_values, vector<FLBA>& values,
- vector<uint8_t>& buffer) {
+template <>
+void InitValues<FLBA>(int num_values, vector<FLBA>& values, vector<uint8_t>& buffer) {
size_t nbytes = num_values * FLBA_LENGTH;
buffer.resize(nbytes);
- random_fixed_byte_array(num_values, 0, buffer.data(), FLBA_LENGTH,
- values.data());
+ random_fixed_byte_array(num_values, 0, buffer.data(), FLBA_LENGTH, values.data());
}
-template<>
-void InitDictValues<bool>(int num_values, int dict_per_page,
- vector<bool>& values, vector<uint8_t>& buffer) {
+template <>
+void InitDictValues<bool>(
+ int num_values, int dict_per_page, vector<bool>& values, vector<uint8_t>& buffer) {
// No op for bool
}
-
template <typename Type>
class TestFlatScanner : public ::testing::Test {
public:
typedef typename Type::c_type T;
- void InitScanner(const ColumnDescriptor *d) {
+ void InitScanner(const ColumnDescriptor* d) {
std::unique_ptr<PageReader> pager(new test::MockPageReader(pages_));
scanner_ = Scanner::Make(ColumnReader::Make(d, std::move(pager)));
}
- void CheckResults(int batch_size, const ColumnDescriptor *d) {
- TypedScanner<Type>* scanner =
- reinterpret_cast<TypedScanner<Type>* >(scanner_.get());
+ void CheckResults(int batch_size, const ColumnDescriptor* d) {
+ TypedScanner<Type>* scanner = reinterpret_cast<TypedScanner<Type>*>(scanner_.get());
T val;
bool is_null = false;
int16_t def_level;
@@ -110,14 +103,14 @@ class TestFlatScanner : public ::testing::Test {
for (int i = 0; i < num_levels_; i++) {
ASSERT_TRUE(scanner->Next(&val, &def_level, &rep_level, &is_null)) << i << j;
if (!is_null) {
- ASSERT_EQ(values_[j], val) << i <<"V"<< j;
+ ASSERT_EQ(values_[j], val) << i << "V" << j;
j++;
}
if (d->max_definition_level() > 0) {
- ASSERT_EQ(def_levels_[i], def_level) << i <<"D"<< j;
+ ASSERT_EQ(def_levels_[i], def_level) << i << "D" << j;
}
if (d->max_repetition_level() > 0) {
- ASSERT_EQ(rep_levels_[i], rep_level) << i <<"R"<< j;
+ ASSERT_EQ(rep_levels_[i], rep_level) << i << "R" << j;
}
}
ASSERT_EQ(num_values_, j);
@@ -132,7 +125,7 @@ class TestFlatScanner : public ::testing::Test {
}
void Execute(int num_pages, int levels_per_page, int batch_size,
- const ColumnDescriptor *d, Encoding::type encoding) {
+ const ColumnDescriptor* d, Encoding::type encoding) {
num_values_ = MakePages<Type>(d, num_pages, levels_per_page, def_levels_, rep_levels_,
values_, data_buffer_, pages_, encoding);
num_levels_ = num_pages * levels_per_page;
@@ -145,14 +138,14 @@ class TestFlatScanner : public ::testing::Test {
std::shared_ptr<ColumnDescriptor>& d2, std::shared_ptr<ColumnDescriptor>& d3,
int length) {
NodePtr type;
- type = schema::PrimitiveNode::Make("c1", Repetition::REQUIRED, Type::type_num,
- LogicalType::NONE, length);
+ type = schema::PrimitiveNode::Make(
+ "c1", Repetition::REQUIRED, Type::type_num, LogicalType::NONE, length);
d1.reset(new ColumnDescriptor(type, 0, 0));
- type = schema::PrimitiveNode::Make("c2", Repetition::OPTIONAL, Type::type_num,
- LogicalType::NONE, length);
+ type = schema::PrimitiveNode::Make(
+ "c2", Repetition::OPTIONAL, Type::type_num, LogicalType::NONE, length);
d2.reset(new ColumnDescriptor(type, 4, 0));
- type = schema::PrimitiveNode::Make("c3", Repetition::REPEATED, Type::type_num,
- LogicalType::NONE, length);
+ type = schema::PrimitiveNode::Make(
+ "c3", Repetition::REPEATED, Type::type_num, LogicalType::NONE, length);
d3.reset(new ColumnDescriptor(type, 4, 2));
}
@@ -173,12 +166,12 @@ class TestFlatScanner : public ::testing::Test {
protected:
int num_levels_;
int num_values_;
- vector<shared_ptr<Page> > pages_;
+ vector<shared_ptr<Page>> pages_;
std::shared_ptr<Scanner> scanner_;
vector<T> values_;
vector<int16_t> def_levels_;
vector<int16_t> rep_levels_;
- vector<uint8_t> data_buffer_; // For BA and FLBA
+ vector<uint8_t> data_buffer_; // For BA and FLBA
};
typedef TestFlatScanner<FLBAType> TestFlatFLBAScanner;
@@ -187,8 +180,8 @@ static int num_levels_per_page = 100;
static int num_pages = 20;
static int batch_size = 32;
-typedef ::testing::Types<Int32Type, Int64Type, Int96Type,
- FloatType, DoubleType, ByteArrayType> TestTypes;
+typedef ::testing::Types<Int32Type, Int64Type, Int96Type, FloatType, DoubleType,
+ ByteArrayType> TestTypes;
typedef TestFlatScanner<BooleanType> TestBooleanFlatScanner;
typedef TestFlatScanner<FLBAType> TestFLBAFlatScanner;
@@ -200,8 +193,8 @@ TYPED_TEST(TestFlatScanner, TestPlainScanner) {
}
TYPED_TEST(TestFlatScanner, TestDictScanner) {
- this->ExecuteAll(num_pages, num_levels_per_page, batch_size, 0,
- Encoding::RLE_DICTIONARY);
+ this->ExecuteAll(
+ num_pages, num_levels_per_page, batch_size, 0, Encoding::RLE_DICTIONARY);
}
TEST_F(TestBooleanFlatScanner, TestPlainScanner) {
@@ -213,8 +206,8 @@ TEST_F(TestFLBAFlatScanner, TestPlainScanner) {
}
TEST_F(TestFLBAFlatScanner, TestDictScanner) {
- this->ExecuteAll(num_pages, num_levels_per_page, batch_size, FLBA_LENGTH,
- Encoding::RLE_DICTIONARY);
+ this->ExecuteAll(
+ num_pages, num_levels_per_page, batch_size, FLBA_LENGTH, Encoding::RLE_DICTIONARY);
}
TEST_F(TestFLBAFlatScanner, TestPlainDictScanner) {
@@ -222,14 +215,13 @@ TEST_F(TestFLBAFlatScanner, TestPlainDictScanner) {
Encoding::PLAIN_DICTIONARY);
}
-
-//PARQUET 502
+// PARQUET 502
TEST_F(TestFlatFLBAScanner, TestSmallBatch) {
NodePtr type = schema::PrimitiveNode::Make("c1", Repetition::REQUIRED,
Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, FLBA_LENGTH, 10, 2);
const ColumnDescriptor d(type, 0, 0);
- num_values_ = MakePages<FLBAType>(&d, 1, 100, def_levels_, rep_levels_, values_,
- data_buffer_, pages_);
+ num_values_ = MakePages<FLBAType>(
+ &d, 1, 100, def_levels_, rep_levels_, values_, data_buffer_, pages_);
num_levels_ = 1 * 100;
InitScanner(&d);
CheckResults(1, &d);
@@ -239,12 +231,12 @@ TEST_F(TestFlatFLBAScanner, TestDescriptorAPI) {
NodePtr type = schema::PrimitiveNode::Make("c1", Repetition::OPTIONAL,
Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, FLBA_LENGTH, 10, 2);
const ColumnDescriptor d(type, 4, 0);
- num_values_ = MakePages<FLBAType>(&d, 1, 100, def_levels_, rep_levels_, values_,
- data_buffer_, pages_);
+ num_values_ = MakePages<FLBAType>(
+ &d, 1, 100, def_levels_, rep_levels_, values_, data_buffer_, pages_);
num_levels_ = 1 * 100;
InitScanner(&d);
TypedScanner<FLBAType>* scanner =
- reinterpret_cast<TypedScanner<FLBAType>* >(scanner_.get());
+ reinterpret_cast<TypedScanner<FLBAType>*>(scanner_.get());
ASSERT_EQ(10, scanner->descr()->type_precision());
ASSERT_EQ(2, scanner->descr()->type_scale());
ASSERT_EQ(FLBA_LENGTH, scanner->descr()->type_length());
@@ -254,12 +246,12 @@ TEST_F(TestFlatFLBAScanner, TestFLBAPrinterNext) {
NodePtr type = schema::PrimitiveNode::Make("c1", Repetition::OPTIONAL,
Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, FLBA_LENGTH, 10, 2);
const ColumnDescriptor d(type, 4, 0);
- num_values_ = MakePages<FLBAType>(&d, 1, 100, def_levels_, rep_levels_, values_,
- data_buffer_, pages_);
+ num_values_ = MakePages<FLBAType>(
+ &d, 1, 100, def_levels_, rep_levels_, values_, data_buffer_, pages_);
num_levels_ = 1 * 100;
InitScanner(&d);
TypedScanner<FLBAType>* scanner =
- reinterpret_cast<TypedScanner<FLBAType>* >(scanner_.get());
+ reinterpret_cast<TypedScanner<FLBAType>*>(scanner_.get());
scanner->SetBatchSize(batch_size);
std::stringstream ss_fail;
for (int i = 0; i < num_levels_; i++) {
@@ -271,5 +263,5 @@ TEST_F(TestFlatFLBAScanner, TestFLBAPrinterNext) {
ASSERT_THROW(scanner->PrintNext(ss_fail, 17), ParquetException);
}
-} // namespace test
-} // namespace parquet
+} // namespace test
+} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/parquet/column/scanner.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/scanner.cc b/src/parquet/column/scanner.cc
index 5397751..8db3d2b 100644
--- a/src/parquet/column/scanner.cc
+++ b/src/parquet/column/scanner.cc
@@ -42,8 +42,8 @@ std::shared_ptr<Scanner> Scanner::Make(std::shared_ptr<ColumnReader> col_reader,
case Type::BYTE_ARRAY:
return std::make_shared<ByteArrayScanner>(col_reader, batch_size, allocator);
case Type::FIXED_LEN_BYTE_ARRAY:
- return std::make_shared<FixedLenByteArrayScanner>(col_reader,
- batch_size, allocator);
+ return std::make_shared<FixedLenByteArrayScanner>(
+ col_reader, batch_size, allocator);
default:
ParquetException::NYI("type reader not implemented");
}
@@ -51,4 +51,4 @@ std::shared_ptr<Scanner> Scanner::Make(std::shared_ptr<ColumnReader> col_reader,
return std::shared_ptr<Scanner>(nullptr);
}
-} // namespace parquet
+} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/674dbb39/src/parquet/column/scanner.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/scanner.h b/src/parquet/column/scanner.h
index d52838e..f27c2d3 100644
--- a/src/parquet/column/scanner.h
+++ b/src/parquet/column/scanner.h
@@ -39,14 +39,14 @@ class Scanner {
public:
explicit Scanner(std::shared_ptr<ColumnReader> reader,
int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE,
- MemoryAllocator* allocator = default_allocator()) :
- batch_size_(batch_size),
- level_offset_(0),
- levels_buffered_(0),
- value_buffer_(0, allocator),
- value_offset_(0),
- values_buffered_(0),
- reader_(reader) {
+ MemoryAllocator* allocator = default_allocator())
+ : batch_size_(batch_size),
+ level_offset_(0),
+ levels_buffered_(0),
+ value_buffer_(0, allocator),
+ value_offset_(0),
+ values_buffered_(0),
+ reader_(reader) {
// TODO: don't allocate for required fields
def_levels_.resize(descr()->max_definition_level() > 0 ? batch_size_ : 0);
rep_levels_.resize(descr()->max_repetition_level() > 0 ? batch_size_ : 0);
@@ -60,19 +60,13 @@ class Scanner {
virtual void PrintNext(std::ostream& out, int width) = 0;
- bool HasNext() {
- return level_offset_ < levels_buffered_ || reader_->HasNext();
- }
+ bool HasNext() { return level_offset_ < levels_buffered_ || reader_->HasNext(); }
- const ColumnDescriptor* descr() const {
- return reader_->descr();
- }
+ const ColumnDescriptor* descr() const { return reader_->descr(); }
- int64_t batch_size() const { return batch_size_;}
+ int64_t batch_size() const { return batch_size_; }
- void SetBatchSize(int64_t batch_size) {
- batch_size_ = batch_size;
- }
+ void SetBatchSize(int64_t batch_size) { batch_size_ = batch_size; }
protected:
int64_t batch_size_;
@@ -90,7 +84,6 @@ class Scanner {
std::shared_ptr<ColumnReader> reader_;
};
-
template <typename DType>
class TypedScanner : public Scanner {
public:
@@ -98,8 +91,8 @@ class TypedScanner : public Scanner {
explicit TypedScanner(std::shared_ptr<ColumnReader> reader,
int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE,
- MemoryAllocator* allocator = default_allocator()) :
- Scanner(reader, batch_size, allocator) {
+ MemoryAllocator* allocator = default_allocator())
+ : Scanner(reader, batch_size, allocator) {
typed_reader_ = static_cast<TypedColumnReader<DType>*>(reader.get());
int value_byte_size = type_traits<DType::type_num>::value_byte_size;
value_buffer_.Resize(batch_size_ * value_byte_size);
@@ -110,14 +103,12 @@ class TypedScanner : public Scanner {
bool NextLevels(int16_t* def_level, int16_t* rep_level) {
if (level_offset_ == levels_buffered_) {
- levels_buffered_ = typed_reader_->ReadBatch(batch_size_, &def_levels_[0],
- &rep_levels_[0], values_, &values_buffered_);
+ levels_buffered_ = typed_reader_->ReadBatch(
+ batch_size_, &def_levels_[0], &rep_levels_[0], values_, &values_buffered_);
value_offset_ = 0;
level_offset_ = 0;
- if (!levels_buffered_) {
- return false;
- }
+ if (!levels_buffered_) { return false; }
}
*def_level = descr()->max_definition_level() > 0 ? def_levels_[level_offset_] : 0;
*rep_level = descr()->max_repetition_level() > 0 ? rep_levels_[level_offset_] : 0;
@@ -126,7 +117,7 @@ class TypedScanner : public Scanner {
}
bool Next(T* val, int16_t* def_level, int16_t* rep_level, bool* is_null) {
- if (level_offset_ == levels_buffered_) {
+ if (level_offset_ == levels_buffered_) {
if (!HasNext()) {
// Out of data pages
return false;
@@ -136,9 +127,7 @@ class TypedScanner : public Scanner {
NextLevels(def_level, rep_level);
*is_null = *def_level < descr()->max_definition_level();
- if (*is_null) {
- return true;
- }
+ if (*is_null) { return true; }
if (value_offset_ == values_buffered_) {
throw ParquetException("Value was non-null, but has not been buffered");
@@ -162,9 +151,7 @@ class TypedScanner : public Scanner {
NextLevels(&def_level, &rep_level);
*is_null = def_level < descr()->max_definition_level();
- if (*is_null) {
- return true;
- }
+ if (*is_null) { return true; }
if (value_offset_ == values_buffered_) {
throw ParquetException("Value was non-null, but has not been buffered");
@@ -178,9 +165,7 @@ class TypedScanner : public Scanner {
bool is_null = false;
char buffer[25];
- if (!NextValue(&val, &is_null)) {
- throw ParquetException("No more values buffered");
- }
+ if (!NextValue(&val, &is_null)) { throw ParquetException("No more values buffered"); }
if (is_null) {
std::string null_fmt = format_fwf<ByteArrayType>(width);
@@ -200,10 +185,9 @@ class TypedScanner : public Scanner {
T* values_;
};
-
template <typename DType>
-inline void TypedScanner<DType>::FormatValue(void* val, char* buffer,
- int bufsize, int width) {
+inline void TypedScanner<DType>::FormatValue(
+ void* val, char* buffer, int bufsize, int width) {
std::string fmt = format_fwf<DType>(width);
snprintf(buffer, bufsize, fmt.c_str(), *reinterpret_cast<T*>(val));
}
@@ -229,8 +213,7 @@ inline void TypedScanner<FLBAType>::FormatValue(
void* val, char* buffer, int bufsize, int width) {
std::string fmt = format_fwf<FLBAType>(width);
std::string result = FixedLenByteArrayToString(
- *reinterpret_cast<FixedLenByteArray*>(val),
- descr()->type_length());
+ *reinterpret_cast<FixedLenByteArray*>(val), descr()->type_length());
snprintf(buffer, bufsize, fmt.c_str(), result.c_str());
}
@@ -243,6 +226,6 @@ typedef TypedScanner<DoubleType> DoubleScanner;
typedef TypedScanner<ByteArrayType> ByteArrayScanner;
typedef TypedScanner<FLBAType> FixedLenByteArrayScanner;
-} // namespace parquet
+} // namespace parquet
-#endif // PARQUET_COLUMN_SCANNER_H
+#endif // PARQUET_COLUMN_SCANNER_H
|