parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject parquet-cpp git commit: PARQUET-614: Remove unneeded LZ4-related code
Date Fri, 13 May 2016 17:26:26 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master 4c7b5f2e4 -> 06d4280e7


PARQUET-614: Remove unneeded LZ4-related code

The LZ4 algorithm is not used in the Parquet format. We will need to make dynamic calls to
liblzo in a form that does not cause GPL complications when we do implement the LZO compression
type.

Author: Wes McKinney <wesm@apache.org>

Closes #103 from wesm/PARQUET-614 and squashes the following commits:

dd35249 [Wes McKinney] Remove unneeded LZ4-related code


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/06d4280e
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/06d4280e
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/06d4280e

Branch: refs/heads/master
Commit: 06d4280e79bf801231a447c2357fd0f4adea56e9
Parents: 4c7b5f2
Author: Wes McKinney <wesm@apache.org>
Authored: Fri May 13 10:26:06 2016 -0700
Committer: Wes McKinney <wesm@apache.org>
Committed: Fri May 13 10:26:06 2016 -0700

----------------------------------------------------------------------
 CMakeLists.txt                         |  6 --
 cmake_modules/FindLz4.cmake            | 92 -----------------------------
 example/decode_benchmark.cc            |  6 --
 src/parquet/compression/CMakeLists.txt |  2 -
 src/parquet/compression/codec-test.cc  |  4 --
 src/parquet/compression/codec.h        | 14 -----
 src/parquet/compression/lz4-codec.cc   | 43 --------------
 thirdparty/build_thirdparty.sh         |  8 ---
 thirdparty/download_thirdparty.sh      |  5 --
 thirdparty/set_thirdparty_env.sh       |  1 -
 thirdparty/versions.sh                 |  5 --
 11 files changed, 186 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/06d4280e/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a16b41b..181828a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -262,12 +262,6 @@ include_directories(SYSTEM ${SNAPPY_INCLUDE_DIR})
 add_library(snappystatic STATIC IMPORTED)
 set_target_properties(snappystatic PROPERTIES IMPORTED_LOCATION ${SNAPPY_STATIC_LIB})
 
-## LZ4
-find_package(Lz4 REQUIRED)
-include_directories(SYSTEM ${LZ4_INCLUDE_DIR})
-add_library(lz4static STATIC IMPORTED)
-set_target_properties(lz4static PROPERTIES IMPORTED_LOCATION ${LZ4_STATIC_LIB})
-
 ## ZLIB
 find_package(ZLIB REQUIRED)
 include_directories(SYSTEM ${ZLIB_INCLUDE_DIRS})

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/06d4280e/cmake_modules/FindLz4.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/FindLz4.cmake b/cmake_modules/FindLz4.cmake
deleted file mode 100644
index 0184f2b..0000000
--- a/cmake_modules/FindLz4.cmake
+++ /dev/null
@@ -1,92 +0,0 @@
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Tries to find Lz4 headers and libraries.
-#
-# Usage of this module as follows:
-#
-#  find_package(Lz4)
-#
-# Variables used by this module, they can change the default behaviour and need
-# to be set before calling find_package:
-#
-#  Lz4_HOME - When set, this path is inspected instead of standard library
-#             locations as the root of the Lz4 installation.
-#             The environment variable LZ4_HOME overrides this veriable.
-#
-# - Find LZ4 (lz4.h, liblz4.a, liblz4.so, and liblz4.so.1)
-# This module defines
-#  LZ4_INCLUDE_DIR, directory containing headers
-#  LZ4_LIBS, directory containing lz4 libraries
-#  LZ4_STATIC_LIB, path to liblz4.a
-#  LZ4_SHARED_LIB, path to liblz4's shared library
-#  LZ4_FOUND, whether lz4 has been found
-
-if( NOT "$ENV{LZ4_HOME}" STREQUAL "")
-    file( TO_CMAKE_PATH "$ENV{LZ4_HOME}" _native_path )
-    list( APPEND _lz4_roots ${_native_path} )
-elseif ( Lz4_HOME )
-    list( APPEND _lz4_roots ${Lz4_HOME} )
-endif()
-
-# Try the parameterized roots, if they exist
-if ( _lz4_roots )
-    find_path( LZ4_INCLUDE_DIR NAMES lz4.h
-        PATHS ${_lz4_roots} NO_DEFAULT_PATH
-        PATH_SUFFIXES "include" )
-    find_library( LZ4_LIBRARIES NAMES lz4
-        PATHS ${_lz4_roots} NO_DEFAULT_PATH
-        PATH_SUFFIXES "lib" )
-else ()
-    find_path( LZ4_INCLUDE_DIR NAMES lz4.h )
-    find_library( LZ4_LIBRARIES NAMES lz4 )
-endif ()
-
-
-if (LZ4_INCLUDE_DIR AND LZ4_LIBRARIES)
-  set(LZ4_FOUND TRUE)
-  get_filename_component( LZ4_LIBS ${LZ4_LIBRARIES} PATH )
-  set(LZ4_LIB_NAME liblz4)
-  set(LZ4_STATIC_LIB ${LZ4_LIBS}/${LZ4_LIB_NAME}.a)
-  set(LZ4_SHARED_LIB ${LZ4_LIBS}/${LZ4_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
-else ()
-  set(LZ4_FOUND FALSE)
-endif ()
-
-if (LZ4_FOUND)
-  if (NOT Lz4_FIND_QUIETLY)
-    message(STATUS "Found the Lz4 library: ${LZ4_LIBRARIES}")
-  endif ()
-else ()
-  if (NOT Lz4_FIND_QUIETLY)
-    set(LZ4_ERR_MSG "Could not find the Lz4 library. Looked in ")
-    if ( _lz4_roots )
-      set(LZ4_ERR_MSG "${LZ4_ERR_MSG} in ${_lz4_roots}.")
-    else ()
-      set(LZ4_ERR_MSG "${LZ4_ERR_MSG} system search paths.")
-    endif ()
-    if (Lz4_FIND_REQUIRED)
-      message(FATAL_ERROR "${LZ4_ERR_MSG}")
-    else (Lz4_FIND_REQUIRED)
-      message(STATUS "${LZ4_ERR_MSG}")
-    endif (Lz4_FIND_REQUIRED)
-  endif ()
-endif ()
-
-mark_as_advanced(
-  LZ4_INCLUDE_DIR
-  LZ4_LIBS
-  LZ4_LIBRARIES
-  LZ4_STATIC_LIB
-  LZ4_SHARED_LIB
-)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/06d4280e/example/decode_benchmark.cc
----------------------------------------------------------------------
diff --git a/example/decode_benchmark.cc b/example/decode_benchmark.cc
index 8a913d3..4eb1975 100644
--- a/example/decode_benchmark.cc
+++ b/example/decode_benchmark.cc
@@ -450,17 +450,11 @@ int main(int argc, char** argv) {
   TestBinaryPackedEncoding("Rand 0-10K", values, 100, 64);
 
   SnappyCodec snappy_codec;
-  Lz4Codec lz4_codec;
 
   TestPlainIntCompressed(&snappy_codec, values, 100, 1);
   TestPlainIntCompressed(&snappy_codec, values, 100, 16);
   TestPlainIntCompressed(&snappy_codec, values, 100, 32);
   TestPlainIntCompressed(&snappy_codec, values, 100, 64);
 
-  TestPlainIntCompressed(&lz4_codec, values, 100, 1);
-  TestPlainIntCompressed(&lz4_codec, values, 100, 16);
-  TestPlainIntCompressed(&lz4_codec, values, 100, 32);
-  TestPlainIntCompressed(&lz4_codec, values, 100, 64);
-
   return 0;
 }

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/06d4280e/src/parquet/compression/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/compression/CMakeLists.txt b/src/parquet/compression/CMakeLists.txt
index f0ee110..eaf733a 100644
--- a/src/parquet/compression/CMakeLists.txt
+++ b/src/parquet/compression/CMakeLists.txt
@@ -17,12 +17,10 @@
 
 add_library(parquet_compression STATIC
   codec.cc
-  lz4-codec.cc
   snappy-codec.cc
   gzip-codec.cc
 )
 target_link_libraries(parquet_compression
-  lz4static
   snappystatic
   zlibstatic)
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/06d4280e/src/parquet/compression/codec-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/compression/codec-test.cc b/src/parquet/compression/codec-test.cc
index d6ddbd6..417ecc7 100644
--- a/src/parquet/compression/codec-test.cc
+++ b/src/parquet/compression/codec-test.cc
@@ -73,10 +73,6 @@ TEST(TestCompressors, Snappy) {
   CheckCodec<SnappyCodec>();
 }
 
-TEST(TestCompressors, Lz4) {
-  CheckCodec<Lz4Codec>();
-}
-
 TEST(TestCompressors, GZip) {
   CheckCodec<GZipCodec>();
 }

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/06d4280e/src/parquet/compression/codec.h
----------------------------------------------------------------------
diff --git a/src/parquet/compression/codec.h b/src/parquet/compression/codec.h
index ffbe563..ca823c5 100644
--- a/src/parquet/compression/codec.h
+++ b/src/parquet/compression/codec.h
@@ -59,20 +59,6 @@ class SnappyCodec : public Codec {
   virtual const char* name() const { return "snappy"; }
 };
 
-// Lz4 codec.
-class Lz4Codec : public Codec {
- public:
-  virtual void Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
-      uint8_t* output_buffer);
-
-  virtual int64_t Compress(int64_t input_len, const uint8_t* input,
-      int64_t output_buffer_len, uint8_t* output_buffer);
-
-  virtual int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input);
-
-  virtual const char* name() const { return "lz4"; }
-};
-
 // GZip codec.
 class GZipCodec : public Codec {
  public:

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/06d4280e/src/parquet/compression/lz4-codec.cc
----------------------------------------------------------------------
diff --git a/src/parquet/compression/lz4-codec.cc b/src/parquet/compression/lz4-codec.cc
deleted file mode 100644
index 7acc1de..0000000
--- a/src/parquet/compression/lz4-codec.cc
+++ /dev/null
@@ -1,43 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <lz4.h>
-#include <cstdint>
-
-#include "parquet/compression/codec.h"
-#include "parquet/exception.h"
-
-namespace parquet {
-
-void Lz4Codec::Decompress(
-    int64_t input_len, const uint8_t* input, int64_t output_len, uint8_t* output_buffer)
{
-  int64_t n = LZ4_decompress_fast(reinterpret_cast<const char*>(input),
-      reinterpret_cast<char*>(output_buffer), output_len);
-  if (n != input_len) { throw ParquetException("Corrupt lz4 compressed data."); }
-}
-
-int64_t Lz4Codec::MaxCompressedLen(int64_t input_len, const uint8_t* input) {
-  return LZ4_compressBound(input_len);
-}
-
-int64_t Lz4Codec::Compress(int64_t input_len, const uint8_t* input,
-    int64_t output_buffer_len, uint8_t* output_buffer) {
-  return LZ4_compress(reinterpret_cast<const char*>(input),
-      reinterpret_cast<char*>(output_buffer), input_len);
-}
-
-}  // namespace parquet

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/06d4280e/thirdparty/build_thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/build_thirdparty.sh b/thirdparty/build_thirdparty.sh
index b637a36..dca586c 100755
--- a/thirdparty/build_thirdparty.sh
+++ b/thirdparty/build_thirdparty.sh
@@ -15,7 +15,6 @@ else
   # Allow passing specific libs to build on the command line
   for arg in "$*"; do
     case $arg in
-      "lz4")        F_LZ4=1 ;;
       "zlib")       F_ZLIB=1 ;;
       "gbenchmark") F_GBENCHMARK=1 ;;
       "gtest")      F_GTEST=1 ;;
@@ -86,13 +85,6 @@ if [ -n "$F_ALL" -o -n "$F_GBENCHMARK" ]; then
   make VERBOSE=1 install || { echo "make $GBENCHMARK_ERROR" ; exit 1; }
 fi
 
-# build lz4
-if [ -n "$F_ALL" -o -n "$F_LZ4" ]; then
-  cd $TP_DIR/$LZ4_BASEDIR/cmake_unofficial
-  CFLAGS=-fPIC cmake -DCMAKE_INSTALL_PREFIX:PATH=$PREFIX $LZ4_DIR
-  make -j$PARALLEL install
-fi
-
 # build zlib
 if [ -n "$F_ALL" -o -n "$F_ZLIB" ]; then
   cd $TP_DIR/$ZLIB_BASEDIR

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/06d4280e/thirdparty/download_thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/download_thirdparty.sh b/thirdparty/download_thirdparty.sh
index a0bd14d..0674f77 100755
--- a/thirdparty/download_thirdparty.sh
+++ b/thirdparty/download_thirdparty.sh
@@ -14,11 +14,6 @@ download_extract_and_cleanup() {
 	rm $filename
 }
 
-if [ ! -d ${LZ4_BASEDIR} ]; then
-  echo "Fetching lz4"
-  download_extract_and_cleanup $LZ4_URL
-fi
-
 if [ ! -d ${SNAPPY_BASEDIR} ]; then
   echo "Fetching snappy"
   download_extract_and_cleanup $SNAPPY_URL

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/06d4280e/thirdparty/set_thirdparty_env.sh
----------------------------------------------------------------------
diff --git a/thirdparty/set_thirdparty_env.sh b/thirdparty/set_thirdparty_env.sh
index 52b705d..80715ef 100644
--- a/thirdparty/set_thirdparty_env.sh
+++ b/thirdparty/set_thirdparty_env.sh
@@ -8,7 +8,6 @@ if [ -z "$THIRDPARTY_DIR" ]; then
 fi
 
 export SNAPPY_HOME=$THIRDPARTY_DIR/installed
-export LZ4_HOME=$THIRDPARTY_DIR/installed
 export ZLIB_HOME=$THIRDPARTY_DIR/installed
 # build script doesn't support building thrift on OSX
 if [ "$(uname)" != "Darwin" ]; then

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/06d4280e/thirdparty/versions.sh
----------------------------------------------------------------------
diff --git a/thirdparty/versions.sh b/thirdparty/versions.sh
index 8380580..b56262a 100755
--- a/thirdparty/versions.sh
+++ b/thirdparty/versions.sh
@@ -1,7 +1,3 @@
-LZ4_VERSION="r131"
-LZ4_URL="https://github.com/Cyan4973/lz4/archive/${LZ4_VERSION}.tar.gz"
-LZ4_BASEDIR=lz4-$LZ4_VERSION
-
 SNAPPY_VERSION=1.1.3
 SNAPPY_URL="https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz"
 SNAPPY_BASEDIR=snappy-$SNAPPY_VERSION
@@ -10,7 +6,6 @@ THRIFT_VERSION=0.9.1
 THRIFT_URL="http://archive.apache.org/dist/thrift/${THRIFT_VERSION}/thrift-${THRIFT_VERSION}.tar.gz"
 THRIFT_BASEDIR=thrift-$THRIFT_VERSION
 
-
 GBENCHMARK_VERSION=1.0.0
 GBENCHMARK_URL="https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
 GBENCHMARK_BASEDIR=benchmark-$GBENCHMARK_VERSION


Mime
View raw message