arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-1335: [C++] Add offset to PrimitiveArray::raw_values to make consistent with other raw_values
Date Tue, 08 Aug 2017 03:38:48 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 03dcce446 -> 939957f33


ARROW-1335: [C++] Add offset to PrimitiveArray::raw_values to make consistent with other raw_values

This is an API change, but fixes an existing inconsistency that was the source of several
bugs that had gone unnoticed because they were only being tested with code having 0 offset
(i.e. unsliced). We'll need a corresponding patch in parquet-cpp

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #949 from wesm/ARROW-1335 and squashes the following commits:

10431ebf [Wes McKinney] Use raw_values in more places
3c96eb4a [Wes McKinney] Add offset to PrimitiveArray::raw_values to make consistent with other
raw_values functions


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/939957f3
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/939957f3
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/939957f3

Branch: refs/heads/master
Commit: 939957f33ed0dd02013917b366ff85eb857c3947
Parents: 03dcce4
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Mon Aug 7 23:38:42 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Mon Aug 7 23:38:42 2017 -0400

----------------------------------------------------------------------
 cpp/src/arrow/array.cc                  |  5 ++
 cpp/src/arrow/array.h                   | 10 ++--
 cpp/src/arrow/compare.cc                | 16 +++---
 cpp/src/arrow/python/arrow_to_pandas.cc | 74 +++++++++++++---------------
 4 files changed, 53 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/939957f3/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index ab0be7a..637eb24 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -159,6 +159,11 @@ PrimitiveArray::PrimitiveArray(const std::shared_ptr<DataType>&
type, int64_t le
       std::make_shared<ArrayData>(type, length, std::move(buffers), null_count, offset));
 }
 
+const uint8_t* PrimitiveArray::raw_values() const {
+  return raw_values_ +
+         offset() * static_cast<const FixedWidthType&>(*type()).bit_width() / 8;
+}
+
 template <typename T>
 NumericArray<T>::NumericArray(const std::shared_ptr<internal::ArrayData>&
data)
     : PrimitiveArray(data) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/939957f3/cpp/src/arrow/array.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index a853f2b..777fbe0 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -292,8 +292,8 @@ class ARROW_EXPORT PrimitiveArray : public FlatArray {
   /// Does not account for any slice offset
   std::shared_ptr<Buffer> values() const { return data_->buffers[1]; }
 
-  /// Does not account for any slice offset
-  const uint8_t* raw_values() const { return raw_values_; }
+  /// \brief Return pointer to start of raw data
+  const uint8_t* raw_values() const;
 
  protected:
   PrimitiveArray() {}
@@ -521,7 +521,7 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
 
   int32_t byte_width() const { return byte_width_; }
 
-  const uint8_t* raw_values() const { return raw_values_; }
+  const uint8_t* raw_values() const { return raw_values_ + byte_width_ * data_->offset;
}
 
   std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const override;
 
@@ -567,7 +567,9 @@ class ARROW_EXPORT DecimalArray : public FlatArray {
   int32_t byte_width() const {
     return static_cast<const DecimalType&>(*type()).byte_width();
   }
-  const uint8_t* raw_values() const { return raw_values_; }
+
+  /// \brief Return pointer to value data, accounting for any offset
+  const uint8_t* raw_values() const { return raw_values_ + byte_width() * data_->offset;
}
 
  private:
   void SetData(const std::shared_ptr<internal::ArrayData>& data);

http://git-wip-us.apache.org/repos/asf/arrow/blob/939957f3/cpp/src/arrow/compare.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 3a4a400..c01f190 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -231,11 +231,11 @@ class RangeEqualsVisitor {
     const uint8_t* right_data = nullptr;
 
     if (left.values()) {
-      left_data = left.raw_values() + left.offset() * width;
+      left_data = left.raw_values();
     }
 
     if (right.values()) {
-      right_data = right.raw_values() + right.offset() * width;
+      right_data = right.raw_values();
     }
 
     for (int64_t i = left_start_idx_, o_i = right_start_idx_; i < left_end_idx_;
@@ -265,11 +265,11 @@ class RangeEqualsVisitor {
     const uint8_t* right_data = nullptr;
 
     if (left.values()) {
-      left_data = left.raw_values() + left.offset() * width;
+      left_data = left.raw_values();
     }
 
     if (right.values()) {
-      right_data = right.raw_values() + right.offset() * width;
+      right_data = right.raw_values();
     }
 
     for (int64_t i = left_start_idx_, o_i = right_start_idx_; i < left_end_idx_;
@@ -352,10 +352,10 @@ static bool IsEqualPrimitive(const PrimitiveArray& left, const PrimitiveArray&
r
   const uint8_t* right_data = nullptr;
 
   if (left.values()) {
-    left_data = left.values()->data() + left.offset() * byte_width;
+    left_data = left.raw_values();
   }
   if (right.values()) {
-    right_data = right.values()->data() + right.offset() * byte_width;
+    right_data = right.raw_values();
   }
 
   if (left.null_count() > 0) {
@@ -399,10 +399,10 @@ static bool IsEqualDecimal(const DecimalArray& left, const DecimalArray&
right)
   const uint8_t* right_data = nullptr;
 
   if (left.values()) {
-    left_data = left.values()->data();
+    left_data = left.raw_values();
   }
   if (right.values()) {
-    right_data = right.values()->data();
+    right_data = right.raw_values();
   }
 
   const int32_t byte_width = left.byte_width();

http://git-wip-us.apache.org/repos/asf/arrow/blob/939957f3/cpp/src/arrow/python/arrow_to_pandas.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index 86f82fd..8c769ee 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -266,13 +266,12 @@ class PandasBlock {
 template <typename T>
 inline void ConvertIntegerWithNulls(const ChunkedArray& data, double* out_values) {
   for (int c = 0; c < data.num_chunks(); c++) {
-    const std::shared_ptr<Array> arr = data.chunk(c);
-    auto prim_arr = static_cast<PrimitiveArray*>(arr.get());
-    auto in_values = reinterpret_cast<const T*>(prim_arr->raw_values());
+    const auto& arr = static_cast<const PrimitiveArray&>(*data.chunk(c));
+    auto in_values = reinterpret_cast<const T*>(arr.raw_values());
     // Upcast to double, set NaN as appropriate
 
-    for (int i = 0; i < arr->length(); ++i) {
-      *out_values++ = prim_arr->IsNull(i) ? NAN : static_cast<double>(in_values[i]);
+    for (int i = 0; i < arr.length(); ++i) {
+      *out_values++ = arr.IsNull(i) ? NAN : static_cast<double>(in_values[i]);
     }
   }
 }
@@ -280,21 +279,19 @@ inline void ConvertIntegerWithNulls(const ChunkedArray& data, double*
out_values
 template <typename T>
 inline void ConvertIntegerNoNullsSameType(const ChunkedArray& data, T* out_values) {
   for (int c = 0; c < data.num_chunks(); c++) {
-    const std::shared_ptr<Array> arr = data.chunk(c);
-    auto prim_arr = static_cast<PrimitiveArray*>(arr.get());
-    auto in_values = reinterpret_cast<const T*>(prim_arr->raw_values());
-    memcpy(out_values, in_values, sizeof(T) * arr->length());
-    out_values += arr->length();
+    const auto& arr = static_cast<const PrimitiveArray&>(*data.chunk(c));
+    auto in_values = reinterpret_cast<const T*>(arr.raw_values());
+    memcpy(out_values, in_values, sizeof(T) * arr.length());
+    out_values += arr.length();
   }
 }
 
 template <typename InType, typename OutType>
 inline void ConvertIntegerNoNullsCast(const ChunkedArray& data, OutType* out_values)
{
   for (int c = 0; c < data.num_chunks(); c++) {
-    const std::shared_ptr<Array> arr = data.chunk(c);
-    auto prim_arr = static_cast<PrimitiveArray*>(arr.get());
-    auto in_values = reinterpret_cast<const InType*>(prim_arr->raw_values());
-    for (int64_t i = 0; i < arr->length(); ++i) {
+    const auto& arr = static_cast<const PrimitiveArray&>(*data.chunk(c));
+    auto in_values = reinterpret_cast<const InType*>(arr.raw_values());
+    for (int64_t i = 0; i < arr.length(); ++i) {
       *out_values = in_values[i];
     }
   }
@@ -520,19 +517,18 @@ inline Status ConvertListsLike(const std::shared_ptr<Column>&
col,
 template <typename T>
 inline void ConvertNumericNullable(const ChunkedArray& data, T na_value, T* out_values)
{
   for (int c = 0; c < data.num_chunks(); c++) {
-    const std::shared_ptr<Array> arr = data.chunk(c);
-    auto prim_arr = static_cast<PrimitiveArray*>(arr.get());
-    auto in_values = reinterpret_cast<const T*>(prim_arr->raw_values());
+    const auto& arr = static_cast<const PrimitiveArray&>(*data.chunk(c));
+    auto in_values = reinterpret_cast<const T*>(arr.raw_values());
 
-    const uint8_t* valid_bits = arr->null_bitmap_data();
+    const uint8_t* valid_bits = arr.null_bitmap_data();
 
-    if (arr->null_count() > 0) {
-      for (int64_t i = 0; i < arr->length(); ++i) {
+    if (arr.null_count() > 0) {
+      for (int64_t i = 0; i < arr.length(); ++i) {
         *out_values++ = BitUtil::BitNotSet(valid_bits, i) ? na_value : in_values[i];
       }
     } else {
-      memcpy(out_values, in_values, sizeof(T) * arr->length());
-      out_values += arr->length();
+      memcpy(out_values, in_values, sizeof(T) * arr.length());
+      out_values += arr.length();
     }
   }
 }
@@ -541,12 +537,11 @@ template <typename InType, typename OutType>
 inline void ConvertNumericNullableCast(const ChunkedArray& data, OutType na_value,
                                        OutType* out_values) {
   for (int c = 0; c < data.num_chunks(); c++) {
-    const std::shared_ptr<Array> arr = data.chunk(c);
-    auto prim_arr = static_cast<PrimitiveArray*>(arr.get());
-    auto in_values = reinterpret_cast<const InType*>(prim_arr->raw_values());
+    const auto& arr = static_cast<const PrimitiveArray&>(*data.chunk(c));
+    auto in_values = reinterpret_cast<const InType*>(arr.raw_values());
 
-    for (int64_t i = 0; i < arr->length(); ++i) {
-      *out_values++ = arr->IsNull(i) ? na_value : static_cast<OutType>(in_values[i]);
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      *out_values++ = arr.IsNull(i) ? na_value : static_cast<OutType>(in_values[i]);
     }
   }
 }
@@ -554,13 +549,12 @@ inline void ConvertNumericNullableCast(const ChunkedArray& data,
OutType na_valu
 template <typename InType, int64_t SHIFT>
 inline void ConvertDatetimeNanos(const ChunkedArray& data, int64_t* out_values) {
   for (int c = 0; c < data.num_chunks(); c++) {
-    const std::shared_ptr<Array> arr = data.chunk(c);
-    auto prim_arr = static_cast<PrimitiveArray*>(arr.get());
-    auto in_values = reinterpret_cast<const InType*>(prim_arr->raw_values());
+    const auto& arr = static_cast<const PrimitiveArray&>(*data.chunk(c));
+    auto in_values = reinterpret_cast<const InType*>(arr.raw_values());
 
-    for (int64_t i = 0; i < arr->length(); ++i) {
-      *out_values++ = arr->IsNull(i) ? kPandasTimestampNull
-                                     : (static_cast<int64_t>(in_values[i]) * SHIFT);
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      *out_values++ = arr.IsNull(i) ? kPandasTimestampNull
+                                    : (static_cast<int64_t>(in_values[i]) * SHIFT);
     }
   }
 }
@@ -1004,6 +998,7 @@ class CategoricalBlock : public PandasBlock {
     for (int c = 0; c < data.num_chunks(); c++) {
       const std::shared_ptr<Array> arr = data.chunk(c);
       const auto& dict_arr = static_cast<const DictionaryArray&>(*arr);
+
       const auto& indices = static_cast<const PrimitiveArray&>(*dict_arr.indices());
       auto in_values = reinterpret_cast<const T*>(indices.raw_values());
 
@@ -1386,8 +1381,8 @@ class ArrowDeserializer {
   Status ConvertValuesZeroCopy(int npy_type, std::shared_ptr<Array> arr) {
     typedef typename internal::arrow_traits<TYPE>::T T;
 
-    auto prim_arr = static_cast<PrimitiveArray*>(arr.get());
-    auto in_values = reinterpret_cast<const T*>(prim_arr->raw_values());
+    const auto& prim_arr = static_cast<const PrimitiveArray&>(*arr);
+    auto in_values = reinterpret_cast<const T*>(prim_arr.raw_values());
 
     // Zero-Copy. We can pass the data pointer directly to NumPy.
     void* data = const_cast<T*>(in_values);
@@ -1461,12 +1456,11 @@ class ArrowDeserializer {
     constexpr int64_t kShift = traits::npy_shift;
 
     for (int c = 0; c < data_.num_chunks(); c++) {
-      const std::shared_ptr<Array> arr = data_.chunk(c);
-      auto prim_arr = static_cast<PrimitiveArray*>(arr.get());
-      auto in_values = reinterpret_cast<const T*>(prim_arr->raw_values());
+      const auto& arr = static_cast<const PrimitiveArray&>(*data_.chunk(c));
+      auto in_values = reinterpret_cast<const T*>(arr.raw_values());
 
-      for (int64_t i = 0; i < arr->length(); ++i) {
-        *out_values++ = arr->IsNull(i) ? na_value : in_values[i] / kShift;
+      for (int64_t i = 0; i < arr.length(); ++i) {
+        *out_values++ = arr.IsNull(i) ? na_value : in_values[i] / kShift;
       }
     }
     return Status::OK();


Mime
View raw message