arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [1/2] arrow git commit: ARROW-1142: [C++] Port over compression toolchain and interfaces from parquet-cpp, use Arrow-style error handling
Date Fri, 23 Jun 2017 23:07:01 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 1514016a7 -> 98f7cac6e


http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/arrow/util/compression.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression.h b/cpp/src/arrow/util/compression.h
new file mode 100644
index 0000000..6886d04
--- /dev/null
+++ b/cpp/src/arrow/util/compression.h
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_UTIL_COMPRESSION_H
+#define ARROW_UTIL_COMPRESSION_H
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+struct Compression {
+  enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI };
+};
+
+class ARROW_EXPORT Codec {
+ public:
+  virtual ~Codec();
+
+  static Status Create(Compression::type codec, std::unique_ptr<Codec>* out);
+
+  virtual Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
+      uint8_t* output_buffer) = 0;
+
+  virtual Status Compress(int64_t input_len, const uint8_t* input,
+      int64_t output_buffer_len, uint8_t* output_buffer, int64_t* output_length) = 0;
+
+  virtual int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) = 0;
+
+  virtual const char* name() const = 0;
+};
+
+// Snappy codec.
+class ARROW_EXPORT SnappyCodec : public Codec {
+ public:
+  Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
+      uint8_t* output_buffer) override;
+
+  Status Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len,
+      uint8_t* output_buffer, int64_t* output_length) override;
+
+  int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) override;
+
+  const char* name() const override { return "snappy"; }
+};
+
+// Brotli codec.
+class ARROW_EXPORT BrotliCodec : public Codec {
+ public:
+  Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
+      uint8_t* output_buffer) override;
+
+  Status Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len,
+      uint8_t* output_buffer, int64_t* output_length) override;
+
+  int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) override;
+
+  const char* name() const override { return "brotli"; }
+};
+
+// GZip codec.
+class ARROW_EXPORT GZipCodec : public Codec {
+ public:
+  /// Compression formats supported by the zlib library
+  enum Format {
+    ZLIB,
+    DEFLATE,
+    GZIP,
+  };
+
+  explicit GZipCodec(Format format = GZIP);
+  virtual ~GZipCodec();
+
+  Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
+      uint8_t* output_buffer) override;
+
+  Status Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len,
+      uint8_t* output_buffer, int64_t* output_length) override;
+
+  int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) override;
+
+  const char* name() const override;
+
+ private:
+  // The gzip compressor is stateful
+  class GZipCodecImpl;
+  std::unique_ptr<GZipCodecImpl> impl_;
+};
+
+}  // namespace arrow
+
+#endif

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/arrow/util/logging.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index 49f1699..8a929da 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -39,9 +39,10 @@ namespace arrow {
 #define ARROW_LOG_INTERNAL(level) ::arrow::internal::CerrLog(level)
 #define ARROW_LOG(level) ARROW_LOG_INTERNAL(ARROW_##level)
 
-#define ARROW_CHECK(condition)                               \
-  (condition) ? 0 : ::arrow::internal::FatalLog(ARROW_FATAL) \
-                        << __FILE__ << __LINE__ << " Check failed: " #condition
" "
+#define ARROW_CHECK(condition)                           \
+  (condition) ? 0                                        \
+              : ::arrow::internal::FatalLog(ARROW_FATAL) \
+                    << __FILE__ << __LINE__ << " Check failed: " #condition
" "
 
 #ifdef NDEBUG
 #define ARROW_DFATAL ARROW_WARNING

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/plasma/malloc.cc
----------------------------------------------------------------------
diff --git a/cpp/src/plasma/malloc.cc b/cpp/src/plasma/malloc.cc
index e7ffd1a..97c9a16 100644
--- a/cpp/src/plasma/malloc.cc
+++ b/cpp/src/plasma/malloc.cc
@@ -42,7 +42,7 @@ int fake_munmap(void*, int64_t);
 #define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
 #define DEFAULT_GRANULARITY ((size_t)128U * 1024U)
 
-#include "thirdparty/dlmalloc.c"
+#include "thirdparty/dlmalloc.c"  // NOLINT
 
 #undef MMAP
 #undef MUNMAP

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/cpp/src/plasma/test/client_tests.cc
----------------------------------------------------------------------
diff --git a/cpp/src/plasma/test/client_tests.cc b/cpp/src/plasma/test/client_tests.cc
index dc45773..29b5b13 100644
--- a/cpp/src/plasma/test/client_tests.cc
+++ b/cpp/src/plasma/test/client_tests.cc
@@ -29,7 +29,7 @@
 #include "plasma/plasma.h"
 #include "plasma/protocol.h"
 
-std::string g_test_executable;
+std::string g_test_executable;  // NOLINT
 
 class TestPlasmaStore : public ::testing::Test {
  public:

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/dev/release/check-rat-report.py
----------------------------------------------------------------------
diff --git a/dev/release/check-rat-report.py b/dev/release/check-rat-report.py
new file mode 100644
index 0000000..e30d72b
--- /dev/null
+++ b/dev/release/check-rat-report.py
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+##############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+##############################################################################
+import fnmatch
+import re
+import sys
+import xml.etree.ElementTree as ET
+
+if len(sys.argv) != 3:
+    sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" %
+                     sys.argv[0])
+    sys.exit(1)
+
+exclude_globs_filename = sys.argv[1]
+xml_filename = sys.argv[2]
+
+globs = [line.strip() for line in open(exclude_globs_filename, "r")]
+
+tree = ET.parse(xml_filename)
+root = tree.getroot()
+resources = root.findall('resource')
+
+all_ok = True
+for r in resources:
+    approvals = r.findall('license-approval')
+    if not approvals or approvals[0].attrib['name'] == 'true':
+        continue
+    clean_name = re.sub('^[^/]+/', '', r.attrib['name'])
+    excluded = False
+    for g in globs:
+        if fnmatch.fnmatch(clean_name, g):
+            excluded = True
+            break
+    if not excluded:
+        sys.stdout.write("NOT APPROVED: %s (%s): %s\n" % (
+            clean_name, r.attrib['name'], approvals[0].attrib['name']))
+        all_ok = False
+
+if not all_ok:
+    sys.exit(1)
+
+print('OK')
+sys.exit(0)

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/dev/release/rat_exclude_files.txt
----------------------------------------------------------------------
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
new file mode 100644
index 0000000..286793e
--- /dev/null
+++ b/dev/release/rat_exclude_files.txt
@@ -0,0 +1,66 @@
+*.gitignore
+*_generated.h
+*.json
+cpp/src/arrow/io/mman.h
+cpp/src/arrow/util/random.h
+cpp/src/arrow/status.cc
+cpp/src/arrow/status.h
+cpp/build-support/asan_symbolize.py
+cpp/build-support/cpplint.py
+cpp/cmake_modules/BuildUtils.cmake
+cpp/cmake_modules/FindPythonLibsNew.cmake
+cpp/cmake_modules/FindNumPy.cmake
+cpp/cmake_modules/SetupCxxFlags.cmake
+cpp/cmake_modules/SnappyCMakeLists.txt
+cpp/cmake_modules/SnappyConfig.h
+cpp/cmake_modules/CompilerInfo.cmake
+cpp/src/plasma/thirdparty/ae/ae.c
+cpp/src/plasma/thirdparty/ae/ae.h
+cpp/src/plasma/thirdparty/ae/ae_epoll.c
+cpp/src/plasma/thirdparty/ae/ae_evport.c
+cpp/src/plasma/thirdparty/ae/ae_kqueue.c
+cpp/src/plasma/thirdparty/ae/ae_select.c
+cpp/src/plasma/thirdparty/ae/config.h
+cpp/src/plasma/thirdparty/ae/zmalloc.h
+cpp/src/plasma/thirdparty/dlmalloc.c
+cpp/src/plasma/thirdparty/xxhash.cc
+cpp/src/plasma/thirdparty/xxhash.h
+dev/release/rat_exclude_files.txt
+js/.npmignore
+python/cmake_modules/BuildUtils.cmake
+python/cmake_modules/FindPythonLibsNew.cmake
+python/cmake_modules/FindNumPy.cmake
+python/cmake_modules/SetupCxxFlags.cmake
+python/cmake_modules/CompilerInfo.cmake
+python/doc/requirements.txt
+python/MANIFEST.in
+python/pyarrow/includes/__init__.pxd
+python/pyarrow/tests/__init__.py
+python/requirements.txt
+pax_global_header
+MANIFEST.in
+__init__.pxd
+__init__.py
+requirements.txt
+version
+*.m4
+configure
+config.sub
+config.h.in
+compile
+missing
+install-sh
+config.guess
+depcomp
+ltmain.sh
+arrow-glib.types
+arrow-glib-sections.txt
+arrow-glib-overrides.txt
+gtk-doc.make
+*.html
+*.sgml
+*.css
+*.png
+*.svg
+*.devhelp2
+*.scss

http://git-wip-us.apache.org/repos/asf/arrow/blob/98f7cac6/dev/release/run-rat.sh
----------------------------------------------------------------------
diff --git a/dev/release/run-rat.sh b/dev/release/run-rat.sh
index 757604f..53a322a 100755
--- a/dev/release/run-rat.sh
+++ b/dev/release/run-rat.sh
@@ -21,65 +21,15 @@
 # download apache rat
 curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/0.12/apache-rat-0.12.jar
> apache-rat-0.12.jar
 
-RAT="java -jar apache-rat-0.12.jar -d "
+RAT="java -jar apache-rat-0.12.jar -x "
+
+RELEASE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
 
 # generate the rat report
-$RAT $1 \
-  -e ".*" \
-  -e mman.h \
-  -e "*_generated.h" \
-  -e "*.json" \
-  -e random.h \
-  -e status.cc \
-  -e status.h \
-  -e asan_symbolize.py \
-  -e cpplint.py \
-  -e BuildUtils.cmake \
-  -e FindPythonLibsNew.cmake \
-  -e FindNumPy.cmake \
-  -e SetupCxxFlags.cmake \
-  -e CompilerInfo.cmake \
-  -e pax_global_header \
-  -e MANIFEST.in \
-  -e __init__.pxd \
-  -e __init__.py \
-  -e requirements.txt \
-  -e version \
-  -e "*.m4" \
-  -e configure \
-  -e config.sub \
-  -e config.h.in \
-  -e compile \
-  -e missing \
-  -e install-sh \
-  -e config.guess \
-  -e depcomp \
-  -e ltmain.sh \
-  -e arrow-glib.types \
-  -e arrow-glib-sections.txt \
-  -e arrow-glib-overrides.txt \
-  -e gtk-doc.make \
-  -e ae.c \
-  -e ae.h \
-  -e ae_epoll.c \
-  -e ae_evport.c \
-  -e ae_kqueue.c \
-  -e ae_select.c \
-  -e config.h \
-  -e zmalloc.h \
-  -e dlmalloc.c \
-  -e xxhash.cc \
-  -e xxhash.h \
-  -e "*.html" \
-  -e "*.sgml" \
-  -e "*.css" \
-  -e "*.png" \
-  -e "*.svg" \
-  -e "*.devhelp2" \
-  -e "*.scss" \
-  > rat.txt
-cat rat.txt
-UNAPPROVED=`cat rat.txt  | grep "Unknown Licenses" | head -n 1 | cut -d " " -f 1`
+$RAT $1 > rat.txt
+python $RELEASE_DIR/check-rat-report.py $RELEASE_DIR/rat_exclude_files.txt rat.txt > filtered_rat.txt
+cat filtered_rat.txt
+UNAPPROVED=`cat filtered_rat.txt  | grep "NOT APPROVED" | wc -l`
 
 if [ "0" -eq "${UNAPPROVED}" ]; then
   echo "No unapproved licenses"


Mime
View raw message