hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From whe...@apache.org
Subject [47/56] [abbrv] hadoop git commit: HDFS-8759. Implement remote block reader in libhdfspp. Contributed by Haohui Mai.
Date Wed, 07 Oct 2015 07:20:13 GMT
HDFS-8759. Implement remote block reader in libhdfspp. Contributed by Haohui Mai.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a7dbcdfe
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a7dbcdfe
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a7dbcdfe

Branch: refs/heads/HDFS-8707
Commit: a7dbcdfeb1d4d67ed680bd4216baadc27b43cae1
Parents: 816dbf9
Author: Haohui Mai <wheat9@apache.org>
Authored: Fri Jul 10 16:50:45 2015 -0700
Committer: Haohui Mai <wheat9@apache.org>
Committed: Wed Oct 7 00:17:11 2015 -0700

----------------------------------------------------------------------
 .../native/libhdfspp/include/libhdfspp/status.h |   4 +-
 .../main/native/libhdfspp/lib/CMakeLists.txt    |  20 ++
 .../native/libhdfspp/lib/common/CMakeLists.txt  |   1 +
 .../main/native/libhdfspp/lib/common/base64.cc  |  71 ++++
 .../main/native/libhdfspp/lib/common/status.cc  |  66 ++++
 .../native/libhdfspp/lib/reader/CMakeLists.txt  |  20 ++
 .../native/libhdfspp/lib/reader/block_reader.h  | 114 +++++++
 .../native/libhdfspp/lib/reader/datatransfer.h  |  35 ++
 .../libhdfspp/lib/reader/remote_block_reader.cc |  46 +++
 .../lib/reader/remote_block_reader_impl.h       | 342 +++++++++++++++++++
 10 files changed, 718 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/a7dbcdfe/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/include/libhdfspp/status.h
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/include/libhdfspp/status.h
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/include/libhdfspp/status.h
index 9436c8b..d2ef005 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/include/libhdfspp/status.h
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/include/libhdfspp/status.h
@@ -45,6 +45,8 @@ class Status {
   { return Status(kUnimplemented, ""); }
   static Status Exception(const char *expception_class_name, const char *error_message)
   { return Status(kException, expception_class_name, error_message); }
+  static Status Error(const char *error_message)
+  { return Exception("Exception", error_message); }
 
   // Returns true iff the status indicates success.
   bool ok() const { return (state_ == NULL); }
@@ -71,7 +73,7 @@ class Status {
     kInvalidArgument = static_cast<unsigned>(std::errc::invalid_argument),
     kResourceUnavailable = static_cast<unsigned>(std::errc::resource_unavailable_try_again),
     kUnimplemented = static_cast<unsigned>(std::errc::function_not_supported),
-    kException = 256,
+    kException = 255,
   };
 
   explicit Status(int code, const char *msg1, const char *msg2);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a7dbcdfe/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/CMakeLists.txt
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/CMakeLists.txt
index 7458453..e77942b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/CMakeLists.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/CMakeLists.txt
@@ -1,2 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_subdirectory(common)
+add_subdirectory(reader)
 add_subdirectory(rpc)
 add_subdirectory(proto)

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a7dbcdfe/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/common/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/common/CMakeLists.txt
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/common/CMakeLists.txt
new file mode 100644
index 0000000..570d0ac
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/common/CMakeLists.txt
@@ -0,0 +1 @@
+add_library(common base64.cc status.cc)

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a7dbcdfe/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/common/base64.cc
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/common/base64.cc
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/common/base64.cc
new file mode 100644
index 0000000..f98fec5
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/common/base64.cc
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util.h"
+
+#include <array>
+#include <functional>
+#include <algorithm>
+
+namespace hdfs {
+
+std::string Base64Encode(const std::string &src) {
+  static const char kDictionary[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                    "abcdefghijklmnopqrstuvwxyz"
+                                    "0123456789+/";
+
+  int encoded_size = (src.size() + 2) / 3 * 4;
+  std::string dst;
+  dst.reserve(encoded_size);
+
+  size_t i = 0;
+  while (i + 3 < src.length()) {
+    const char *s = &src[i];
+    const int r[4] = {s[0] >> 2, ((s[0] << 4) | (s[1] >> 4)) & 0x3f,
+                      ((s[1] << 2) | (s[2] >> 6)) & 0x3f, s[2] & 0x3f};
+
+    std::transform(r, r + sizeof(r) / sizeof(int), std::back_inserter(dst),
+                   [&r](unsigned char v) { return kDictionary[v]; });
+    i += 3;
+  }
+
+  size_t remained = src.length() - i;
+  const char *s = &src[i];
+
+  switch (remained) {
+  case 0:
+    break;
+  case 1: {
+    char padding[4] = {kDictionary[s[0] >> 2], kDictionary[(s[0] << 4) &
0x3f],
+                       '=', '='};
+    dst.append(padding, sizeof(padding));
+  } break;
+  case 2: {
+    char padding[4] = {kDictionary[src[i] >> 2],
+                       kDictionary[((s[0] << 4) | (s[1] >> 4)) & 0x3f],
+                       kDictionary[(s[1] << 2) & 0x3f], '='};
+    dst.append(padding, sizeof(padding));
+  } break;
+  default:
+    assert("Unreachable");
+    break;
+  }
+  return dst;
+}
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a7dbcdfe/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/common/status.cc
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/common/status.cc
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/common/status.cc
new file mode 100644
index 0000000..66cfa1c
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/common/status.cc
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "libhdfspp/status.h"
+
+#include <cassert>
+#include <cstring>
+
+namespace hdfs {
+
+Status::Status(int code, const char *msg1)
+    : state_(ConstructState(code, msg1, nullptr)) {}
+
+Status::Status(int code, const char *msg1, const char *msg2)
+    : state_(ConstructState(code, msg1, msg2)) {}
+
+const char *Status::ConstructState(int code, const char *msg1,
+                                   const char *msg2) {
+  assert(code != kOk);
+  const uint32_t len1 = strlen(msg1);
+  const uint32_t len2 = msg2 ? strlen(msg2) : 0;
+  const uint32_t size = len1 + (len2 ? (2 + len2) : 0);
+  char *result = new char[size + 8 + 2];
+  *reinterpret_cast<uint32_t *>(result) = size;
+  *reinterpret_cast<uint32_t *>(result + 4) = code;
+  memcpy(result + 8, msg1, len1);
+  if (len2) {
+    result[8 + len1] = ':';
+    result[9 + len1] = ' ';
+    memcpy(result + 10 + len1, msg2, len2);
+  }
+  return result;
+}
+
+std::string Status::ToString() const {
+  if (!state_) {
+    return "OK";
+  } else {
+    uint32_t length = *reinterpret_cast<const uint32_t *>(state_);
+    return std::string(state_ + 8, length);
+  }
+}
+
+const char *Status::CopyState(const char *state) {
+  uint32_t size;
+  memcpy(&size, state, sizeof(size));
+  char *result = new char[size + 8];
+  memcpy(result, state, size + 8);
+  return result;
+}
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a7dbcdfe/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/CMakeLists.txt
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/CMakeLists.txt
new file mode 100644
index 0000000..65ec108
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/CMakeLists.txt
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_library(reader remote_block_reader.cc)
+add_dependencies(reader proto)

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a7dbcdfe/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/block_reader.h
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/block_reader.h
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/block_reader.h
new file mode 100644
index 0000000..81636b9
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/block_reader.h
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BLOCK_READER_H_
+#define BLOCK_READER_H_
+
+#include "libhdfspp/status.h"
+#include "datatransfer.pb.h"
+
+#include <memory>
+
+namespace hdfs {
+
+struct CacheStrategy {
+  bool drop_behind_specified;
+  bool drop_behind;
+  bool read_ahead_specified;
+  unsigned long long read_ahead;
+  CacheStrategy()
+      : drop_behind_specified(false), drop_behind(false),
+        read_ahead_specified(false), read_ahead(false) {}
+};
+
+enum DropBehindStrategy {
+  kUnspecified = 0,
+  kEnableDropBehind = 1,
+  kDisableDropBehind = 2,
+};
+
+enum EncryptionScheme {
+  kNone = 0,
+  kAESCTRNoPadding = 1,
+};
+
+struct BlockReaderOptions {
+  bool verify_checksum;
+  CacheStrategy cache_strategy;
+  EncryptionScheme encryption_scheme;
+
+  BlockReaderOptions()
+      : verify_checksum(true), encryption_scheme(EncryptionScheme::kNone) {}
+};
+
+template <class Stream>
+class RemoteBlockReader
+    : public std::enable_shared_from_this<RemoteBlockReader<Stream>> {
+public:
+  explicit RemoteBlockReader(const BlockReaderOptions &options, Stream *stream)
+      : stream_(stream), state_(kOpen), options_(options),
+        chunk_padding_bytes_(0) {}
+
+  template <class MutableBufferSequence, class ReadHandler>
+  void async_read_some(const MutableBufferSequence &buffers,
+                       const ReadHandler &handler);
+
+  template <class MutableBufferSequence>
+  size_t read_some(const MutableBufferSequence &buffers, Status *status);
+
+  Status connect(const std::string &client_name,
+                 const hadoop::common::TokenProto *token,
+                 const hadoop::hdfs::ExtendedBlockProto *block, uint64_t length,
+                 uint64_t offset);
+
+  template <class ConnectHandler>
+  void async_connect(const std::string &client_name,
+                     const hadoop::common::TokenProto *token,
+                     const hadoop::hdfs::ExtendedBlockProto *block,
+                     uint64_t length, uint64_t offset,
+                     const ConnectHandler &handler);
+
+private:
+  struct ReadPacketHeader;
+  struct ReadChecksum;
+  struct ReadPadding;
+  template <class MutableBufferSequence> struct ReadData;
+  struct AckRead;
+  enum State {
+    kOpen,
+    kReadPacketHeader,
+    kReadChecksum,
+    kReadPadding,
+    kReadData,
+    kFinished,
+  };
+
+  Stream *stream_;
+  hadoop::hdfs::PacketHeaderProto header_;
+  State state_;
+  BlockReaderOptions options_;
+  size_t packet_len_;
+  int packet_data_read_bytes_;
+  int chunk_padding_bytes_;
+  long long bytes_to_read_;
+  std::vector<char> checksum_;
+};
+}
+
+#include "remote_block_reader_impl.h"
+
+#endif

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a7dbcdfe/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/datatransfer.h
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/datatransfer.h
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/datatransfer.h
new file mode 100644
index 0000000..d22f5e8
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/datatransfer.h
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef COMMON_DATA_TRANSFER_H_
+#define COMMON_DATA_TRANSFER_H_
+
+namespace hdfs {
+
+enum {
+  kDataTransferVersion = 28,
+  kDataTransferSasl = 0xdeadbeef,
+};
+
+enum Operation {
+  kWriteBlock = 80,
+  kReadBlock  = 81,
+};
+
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a7dbcdfe/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/remote_block_reader.cc
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/remote_block_reader.cc
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/remote_block_reader.cc
new file mode 100644
index 0000000..68bc4ee
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/remote_block_reader.cc
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "block_reader.h"
+
+namespace hdfs {
+
+hadoop::hdfs::OpReadBlockProto
+ReadBlockProto(const std::string &client_name, bool verify_checksum,
+               const hadoop::common::TokenProto *token,
+               const hadoop::hdfs::ExtendedBlockProto *block, uint64_t length,
+               uint64_t offset) {
+  using namespace hadoop::hdfs;
+  using namespace hadoop::common;
+  BaseHeaderProto *base_h = new BaseHeaderProto();
+  base_h->set_allocated_block(new ExtendedBlockProto(*block));
+  if (token) {
+    base_h->set_allocated_token(new TokenProto(*token));
+  }
+  ClientOperationHeaderProto *h = new ClientOperationHeaderProto();
+  h->set_clientname(client_name);
+  h->set_allocated_baseheader(base_h);
+
+  OpReadBlockProto p;
+  p.set_allocated_header(h);
+  p.set_offset(offset);
+  p.set_len(length);
+  p.set_sendchecksums(verify_checksum);
+  // TODO: p.set_allocated_cachingstrategy();
+  return p;
+}
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a7dbcdfe/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/remote_block_reader_impl.h
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/remote_block_reader_impl.h
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/remote_block_reader_impl.h
new file mode 100644
index 0000000..68ea6ad
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/native/libhdfspp/lib/reader/remote_block_reader_impl.h
@@ -0,0 +1,342 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIBHDFSPP_READER_REMOTE_BLOCK_READER_IMPL_H_
+#define LIBHDFSPP_READER_REMOTE_BLOCK_READER_IMPL_H_
+
+#include "datatransfer.h"
+#include "common/continuation/asio.h"
+#include "common/continuation/protobuf.h"
+
+#include <asio/buffers_iterator.hpp>
+#include <asio/streambuf.hpp>
+#include <asio/write.hpp>
+
+#include <arpa/inet.h>
+
+#include <future>
+
+namespace hdfs {
+
+hadoop::hdfs::OpReadBlockProto
+ReadBlockProto(const std::string &client_name, bool verify_checksum,
+               const hadoop::common::TokenProto *token,
+               const hadoop::hdfs::ExtendedBlockProto *block, uint64_t length,
+               uint64_t offset);
+
+template <class Stream>
+template <class ConnectHandler>
+void RemoteBlockReader<Stream>::async_connect(
+    const std::string &client_name, const hadoop::common::TokenProto *token,
+    const hadoop::hdfs::ExtendedBlockProto *block, uint64_t length,
+    uint64_t offset, const ConnectHandler &handler) {
+  // The total number of bytes that we need to transfer from the DN is
+  // the amount that the user wants (bytesToRead), plus the padding at
+  // the beginning in order to chunk-align. Note that the DN may elect
+  // to send more than this amount if the read starts/ends mid-chunk.
+  bytes_to_read_ = length;
+
+  struct State {
+    std::string header;
+    hadoop::hdfs::OpReadBlockProto request;
+    hadoop::hdfs::BlockOpResponseProto response;
+  };
+
+  auto m = continuation::Pipeline<State>::Create();
+  State *s = &m->state();
+
+  s->header.insert(s->header.begin(),
+                   {0, kDataTransferVersion, Operation::kReadBlock});
+  s->request = std::move(ReadBlockProto(client_name, options_.verify_checksum,
+                                        token, block, length, offset));
+
+  auto read_pb_message =
+      new continuation::ReadDelimitedPBMessageContinuation<Stream, 16384>(
+          stream_, &s->response);
+
+  m->Push(continuation::Write(stream_, asio::buffer(s->header)))
+      .Push(continuation::WriteDelimitedPBMessage(stream_, &s->request))
+      .Push(read_pb_message);
+
+  m->Run([this, handler, offset](const Status &status, const State &s) {
+    Status stat = status;
+    if (stat.ok()) {
+      const auto &resp = s.response;
+      if (resp.status() == ::hadoop::hdfs::Status::SUCCESS) {
+        if (resp.has_readopchecksuminfo()) {
+          const auto &checksum_info = resp.readopchecksuminfo();
+          chunk_padding_bytes_ = offset - checksum_info.chunkoffset();
+        }
+        state_ = kReadPacketHeader;
+      } else {
+        stat = Status::Error(s.response.message().c_str());
+      }
+    }
+    handler(stat);
+  });
+}
+
+template <class Stream>
+struct RemoteBlockReader<Stream>::ReadPacketHeader
+    : continuation::Continuation {
+  ReadPacketHeader(RemoteBlockReader<Stream> *parent) : parent_(parent) {}
+
+  virtual void Run(const Next &next) override {
+    parent_->packet_data_read_bytes_ = 0;
+    parent_->packet_len_ = 0;
+    auto handler = [next, this](const asio::error_code &ec, size_t) {
+      Status status;
+      if (ec) {
+        status = Status(ec.value(), ec.message().c_str());
+      } else {
+        parent_->packet_len_ = packet_length();
+        parent_->header_.Clear();
+        bool v = parent_->header_.ParseFromArray(&buf_[kHeaderStart],
+                                                 header_length());
+        assert(v && "Failed to parse the header");
+        parent_->state_ = kReadChecksum;
+      }
+      next(status);
+    };
+
+    asio::async_read(*parent_->stream_, asio::buffer(buf_),
+                     std::bind(&ReadPacketHeader::CompletionHandler, this,
+                               std::placeholders::_1, std::placeholders::_2),
+                     handler);
+  }
+
+private:
+  static const size_t kMaxHeaderSize = 512;
+  static const size_t kPayloadLenOffset = 0;
+  static const size_t kPayloadLenSize = sizeof(int);
+  static const size_t kHeaderLenOffset = 4;
+  static const size_t kHeaderLenSize = sizeof(short);
+  static const size_t kHeaderStart = kPayloadLenSize + kHeaderLenSize;
+
+  RemoteBlockReader<Stream> *parent_;
+  std::array<char, kMaxHeaderSize> buf_;
+
+  size_t packet_length() const {
+    return ntohl(*reinterpret_cast<const unsigned *>(&buf_[kPayloadLenOffset]));
+  }
+
+  size_t header_length() const {
+    return ntohs(*reinterpret_cast<const short *>(&buf_[kHeaderLenOffset]));
+  }
+
+  size_t CompletionHandler(const asio::error_code &ec, size_t transferred) {
+    if (ec) {
+      return 0;
+    } else if (transferred < kHeaderStart) {
+      return kHeaderStart - transferred;
+    } else {
+      return kHeaderStart + header_length() - transferred;
+    }
+  }
+};
+
+template <class Stream>
+struct RemoteBlockReader<Stream>::ReadChecksum : continuation::Continuation {
+  ReadChecksum(RemoteBlockReader<Stream> *parent) : parent_(parent) {}
+
+  virtual void Run(const Next &next) override {
+    auto parent = parent_;
+    if (parent->state_ != kReadChecksum) {
+      next(Status::OK());
+      return;
+    }
+
+    auto handler = [parent, next](const asio::error_code &ec, size_t) {
+      Status status;
+      if (ec) {
+        status = Status(ec.value(), ec.message().c_str());
+      } else {
+        parent->state_ =
+            parent->chunk_padding_bytes_ ? kReadPadding : kReadData;
+      }
+      next(status);
+    };
+    parent->checksum_.resize(parent->packet_len_ - sizeof(int) -
+                             parent->header_.datalen());
+    asio::async_read(*parent->stream_, asio::buffer(parent->checksum_),
+                     handler);
+  }
+
+private:
+  RemoteBlockReader<Stream> *parent_;
+};
+
+template <class Stream>
+struct RemoteBlockReader<Stream>::ReadPadding : continuation::Continuation {
+  ReadPadding(RemoteBlockReader<Stream> *parent)
+      : parent_(parent), padding_(parent->chunk_padding_bytes_),
+        bytes_transferred_(std::make_shared<size_t>(0)),
+        read_data_(new ReadData<asio::mutable_buffers_1>(
+            parent, bytes_transferred_, asio::buffer(padding_))) {}
+
+  virtual void Run(const Next &next) override {
+    if (parent_->state_ != kReadPadding || !parent_->chunk_padding_bytes_) {
+      next(Status::OK());
+      return;
+    }
+
+    auto h = [next, this](const Status &status) {
+      if (status.ok()) {
+        assert(reinterpret_cast<const int &>(*bytes_transferred_) ==
+               parent_->chunk_padding_bytes_);
+        parent_->chunk_padding_bytes_ = 0;
+        parent_->state_ = kReadData;
+      }
+      next(status);
+    };
+    read_data_->Run(h);
+  }
+
+private:
+  RemoteBlockReader<Stream> *parent_;
+  std::vector<char> padding_;
+  std::shared_ptr<size_t> bytes_transferred_;
+  std::shared_ptr<continuation::Continuation> read_data_;
+  ReadPadding(const ReadPadding &) = delete;
+  ReadPadding &operator=(const ReadPadding &) = delete;
+};
+
+template <class Stream>
+template <class MutableBufferSequence>
+struct RemoteBlockReader<Stream>::ReadData : continuation::Continuation {
+  ReadData(RemoteBlockReader<Stream> *parent,
+           std::shared_ptr<size_t> bytes_transferred,
+           const MutableBufferSequence &buf)
+      : parent_(parent), bytes_transferred_(bytes_transferred), buf_(buf) {}
+
+  virtual void Run(const Next &next) override {
+    auto handler =
+        [next, this](const asio::error_code &ec, size_t transferred) {
+          Status status;
+          if (ec) {
+            status = Status(ec.value(), ec.message().c_str());
+          }
+          *bytes_transferred_ += transferred;
+          parent_->bytes_to_read_ -= transferred;
+          parent_->packet_data_read_bytes_ += transferred;
+          if (parent_->packet_data_read_bytes_ >= parent_->header_.datalen()) {
+            parent_->state_ = kReadPacketHeader;
+          }
+          next(status);
+        };
+
+    auto data_len =
+        parent_->header_.datalen() - parent_->packet_data_read_bytes_;
+    async_read(*parent_->stream_, buf_, asio::transfer_exactly(data_len),
+               handler);
+  }
+
+private:
+  RemoteBlockReader<Stream> *parent_;
+  std::shared_ptr<size_t> bytes_transferred_;
+  MutableBufferSequence buf_;
+};
+
+template <class Stream>
+struct RemoteBlockReader<Stream>::AckRead : continuation::Continuation {
+  AckRead(RemoteBlockReader<Stream> *parent) : parent_(parent) {}
+
+  virtual void Run(const Next &next) override {
+    if (parent_->bytes_to_read_ > 0) {
+      next(Status::OK());
+      return;
+    }
+
+    auto m =
+        continuation::Pipeline<hadoop::hdfs::ClientReadStatusProto>::Create();
+    m->state().set_status(parent_->options_.verify_checksum
+                              ? hadoop::hdfs::Status::CHECKSUM_OK
+                              : hadoop::hdfs::Status::SUCCESS);
+
+    m->Push(
+        continuation::WriteDelimitedPBMessage(parent_->stream_, &m->state()));
+
+    m->Run([this, next](const Status &status,
+                        const hadoop::hdfs::ClientReadStatusProto &) {
+      if (status.ok()) {
+        parent_->state_ = RemoteBlockReader<Stream>::kFinished;
+      }
+      next(status);
+    });
+  }
+
+private:
+  RemoteBlockReader<Stream> *parent_;
+};
+
+template <class Stream>
+template <class MutableBufferSequence, class ReadHandler>
+void RemoteBlockReader<Stream>::async_read_some(
+    const MutableBufferSequence &buffers, const ReadHandler &handler) {
+  assert(state_ != kOpen && "Not connected");
+
+  struct State {
+    std::shared_ptr<size_t> bytes_transferred;
+  };
+  auto m = continuation::Pipeline<State>::Create();
+  m->state().bytes_transferred = std::make_shared<size_t>(0);
+
+  m->Push(new ReadPacketHeader(this))
+      .Push(new ReadChecksum(this))
+      .Push(new ReadPadding(this))
+      .Push(new ReadData<MutableBufferSequence>(
+          this, m->state().bytes_transferred, buffers))
+      .Push(new AckRead(this));
+
+  auto self = this->shared_from_this();
+  m->Run([self, handler](const Status &status, const State &state) {
+    handler(status, *state.bytes_transferred);
+  });
+}
+
+template <class Stream>
+template <class MutableBufferSequence>
+size_t
+RemoteBlockReader<Stream>::read_some(const MutableBufferSequence &buffers,
+                                     Status *status) {
+  size_t transferred = 0;
+  auto done = std::make_shared<std::promise<void>>();
+  auto future = done->get_future();
+  async_read_some(buffers,
+                  [status, &transferred, done](const Status &stat, size_t t) {
+                    *status = stat;
+                    transferred = t;
+                    done->set_value();
+                  });
+  future.wait();
+  return transferred;
+}
+
+template <class Stream>
+Status RemoteBlockReader<Stream>::connect(
+    const std::string &client_name, const hadoop::common::TokenProto *token,
+    const hadoop::hdfs::ExtendedBlockProto *block, uint64_t length,
+    uint64_t offset) {
+  auto stat = std::make_shared<std::promise<Status>>();
+  std::future<Status> future(stat->get_future());
+  async_connect(client_name, token, block, length, offset,
+                [stat](const Status &status) { stat->set_value(status); });
+  return future.get();
+}
+}
+
+#endif


Mime
View raw message