From common-commits-return-80443-archive-asf-public=cust-asf.ponee.io@hadoop.apache.org Thu Mar 22 23:04:35 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 40DAA18077A for ; Thu, 22 Mar 2018 23:04:34 +0100 (CET) Received: (qmail 39741 invoked by uid 500); 22 Mar 2018 22:04:27 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 39012 invoked by uid 99); 22 Mar 2018 22:04:27 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 22 Mar 2018 22:04:27 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id D9F7CF6770; Thu, 22 Mar 2018 22:04:25 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: aengineer@apache.org To: common-commits@hadoop.apache.org Date: Thu, 22 Mar 2018 22:04:34 -0000 Message-Id: <39609d580c004097878df3b5ca68d845@git.apache.org> In-Reply-To: <8f2b4901454b4370b18adb970c75559e@git.apache.org> References: <8f2b4901454b4370b18adb970c75559e@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [10/50] [abbrv] hadoop git commit: HDFS-10754: libhdfs++: Create tools directory and implement hdfs_cat, hdfs_chgrp, hdfs_chown, hdfs_chmod and hdfs_find. Contributed by Anatoli Shein. http://git-wip-us.apache.org/repos/asf/hadoop/blob/4f6cb5d1/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfs_shim.c ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfs_shim.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfs_shim.c index fd82da3..ddba67f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfs_shim.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/hdfs_shim.c @@ -492,6 +492,10 @@ int hdfsFreeBlockLocations(struct hdfsBlockLocations * locations) { return libhdfspp_hdfsFreeBlockLocations(locations); } +hdfsFileInfo *hdfsFind(hdfsFS fs, const char* path, const char* name, uint32_t *numEntries) { + return (hdfsFileInfo *)libhdfspp_hdfsFind(fs->libhdfsppRep, path, name, numEntries); +} + int hdfsCreateSnapshot(hdfsFS fs, const char* path, const char* name) { return libhdfspp_hdfsCreateSnapshot(fs->libhdfsppRep, path, name); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/4f6cb5d1/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_wrapper_undefs.h ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_wrapper_undefs.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_wrapper_undefs.h index 481ed68..644ff13 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_wrapper_undefs.h +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_wrapper_undefs.h @@ -94,6 +94,7 @@ #undef hdfsCancel #undef hdfsGetBlockLocations #undef hdfsFreeBlockLocations +#undef hdfsFind #undef hdfsCreateSnapshot #undef hdfsDeleteSnapshot #undef hdfsAllowSnapshot http://git-wip-us.apache.org/repos/asf/hadoop/blob/4f6cb5d1/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfspp_wrapper_defines.h ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfspp_wrapper_defines.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfspp_wrapper_defines.h index a1e4483..c186d63 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfspp_wrapper_defines.h +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfspp_wrapper_defines.h @@ -94,6 +94,7 @@ #define hdfsCancel libhdfspp_hdfsCancel #define hdfsGetBlockLocations libhdfspp_hdfsGetBlockLocations #define hdfsFreeBlockLocations libhdfspp_hdfsFreeBlockLocations +#define hdfsFind libhdfspp_hdfsFind #define hdfsCreateSnapshot libhdfspp_hdfsCreateSnapshot #define hdfsDeleteSnapshot libhdfspp_hdfsDeleteSnapshot #define hdfsAllowSnapshot libhdfspp_hdfsAllowSnapshot http://git-wip-us.apache.org/repos/asf/hadoop/blob/4f6cb5d1/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt new file mode 100644 index 0000000..f0817eb --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt @@ -0,0 +1,42 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Default LIBHDFSPP_DIR to the default install location. You can override +# it by add -DLIBHDFSPP_DIR=... to your cmake invocation +set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX}) + +include_directories( ${LIBHDFSPP_DIR}/include ) +link_directories( ${LIBHDFSPP_DIR}/lib ) + +add_library(tools_common_obj OBJECT tools_common.cpp) +add_library(tools_common $) + +add_executable(hdfs_cat hdfs_cat.cpp) +target_link_libraries(hdfs_cat tools_common hdfspp) + +add_executable(hdfs_chgrp hdfs_chgrp.cpp) +target_link_libraries(hdfs_chgrp tools_common hdfspp) + +add_executable(hdfs_chown hdfs_chown.cpp) +target_link_libraries(hdfs_chown tools_common hdfspp) + +add_executable(hdfs_chmod hdfs_chmod.cpp) +target_link_libraries(hdfs_chmod tools_common hdfspp) + +add_executable(hdfs_find hdfs_find.cpp) +target_link_libraries(hdfs_find tools_common hdfspp) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hadoop/blob/4f6cb5d1/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_cat.cpp ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_cat.cpp b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_cat.cpp new file mode 100644 index 0000000..166a7bf --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_cat.cpp @@ -0,0 +1,120 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +#include +#include +#include "tools_common.h" + +void usage(){ + std::cout << "Usage: hdfs_cat [OPTION] FILE" + << std::endl + << std::endl << "Concatenate FILE to standard output." + << std::endl + << std::endl << " -h display this help and exit" + << std::endl + << std::endl << "Examples:" + << std::endl << "hdfs_cat hdfs://localhost.localdomain:9433/dir/file" + << std::endl << "hdfs_cat /dir/file" + << std::endl; +} + +#define BUF_SIZE 4096 + +int main(int argc, char *argv[]) { + if (argc != 2) { + usage(); + exit(EXIT_FAILURE); + } + + int input; + + //Using GetOpt to read in the values + opterr = 0; + while ((input = getopt(argc, argv, "h")) != -1) { + switch (input) + { + case 'h': + usage(); + exit(EXIT_SUCCESS); + break; + case '?': + if (isprint(optopt)) + std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl; + else + std::cerr << "Unknown option character `" << (char) optopt << "'." << std::endl; + usage(); + exit(EXIT_FAILURE); + default: + exit(EXIT_FAILURE); + } + } + + std::string uri_path = argv[optind]; + + //Building a URI object from the given uri_path + hdfs::optional uri = hdfs::URI::parse_from_string(uri_path); + if (!uri) { + std::cerr << "Malformed URI: " << uri_path << std::endl; + exit(EXIT_FAILURE); + } + + //TODO: HDFS-9539 Currently options can be returned empty + hdfs::Options options = *hdfs::getOptions(); + + std::shared_ptr fs = hdfs::doConnect(uri.value(), options); + if (!fs) { + std::cerr << "Could not connect the file system. " << std::endl; + exit(EXIT_FAILURE); + } + + hdfs::FileHandle *file_raw = nullptr; + hdfs::Status status = fs->Open(uri->get_path(), &file_raw); + if (!status.ok()) { + std::cerr << "Could not open file " << uri->get_path() << ". " << status.ToString() << std::endl; + exit(EXIT_FAILURE); + } + //wrapping file_raw into a unique pointer to guarantee deletion + std::unique_ptr file(file_raw); + + char input_buffer[BUF_SIZE]; + ssize_t total_bytes_read = 0; + size_t last_bytes_read = 0; + + do{ + //Reading file chunks + status = file->Read(input_buffer, sizeof(input_buffer), &last_bytes_read); + if(status.ok()) { + //Writing file chunks to stdout + fwrite(input_buffer, last_bytes_read, 1, stdout); + total_bytes_read += last_bytes_read; + } else { + if(status.is_invalid_offset()){ + //Reached the end of the file + break; + } else { + std::cerr << "Error reading the file: " << status.ToString() << std::endl; + exit(EXIT_FAILURE); + } + } + } while (last_bytes_read > 0); + + // Clean up static data and prevent valgrind memory leaks + google::protobuf::ShutdownProtobufLibrary(); + return 0; +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/4f6cb5d1/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_chgrp.cpp ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_chgrp.cpp b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_chgrp.cpp new file mode 100644 index 0000000..2bb6843 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_chgrp.cpp @@ -0,0 +1,196 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +#include +#include +#include +#include "tools_common.h" + +void usage(){ + std::cout << "Usage: hdfs_chgrp [OPTION] GROUP FILE" + << std::endl + << std::endl << "Change the group association of each FILE to GROUP." + << std::endl << "The user must be the owner of files. Additional information is in the Permissions Guide:" + << std::endl << "https://hadoop.apache.org/docs/r2.7.1/hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html" + << std::endl + << std::endl << " -R operate on files and directories recursively" + << std::endl << " -h display this help and exit" + << std::endl + << std::endl << "Examples:" + << std::endl << "hdfs_chgrp -R new_group hdfs://localhost.localdomain:9433/dir/file" + << std::endl << "hdfs_chgrp new_group /dir/file" + << std::endl; +} + +struct SetOwnerState { + const std::string username; + const std::string groupname; + const std::function handler; + //The request counter is incremented once every time SetOwner async call is made + uint64_t request_counter; + //This boolean will be set when find returns the last result + bool find_is_done; + //Final status to be returned + hdfs::Status status; + //Shared variables will need protection with a lock + std::mutex lock; + SetOwnerState(const std::string & username_, const std::string & groupname_, + const std::function & handler_, + uint64_t request_counter_, bool find_is_done_) + : username(username_), + groupname(groupname_), + handler(handler_), + request_counter(request_counter_), + find_is_done(find_is_done_), + status(), + lock() { + } +}; + +int main(int argc, char *argv[]) { + //We should have 3 or 4 parameters + if (argc != 3 && argc != 4) { + usage(); + exit(EXIT_FAILURE); + } + + bool recursive = false; + int input; + + //Using GetOpt to read in the values + opterr = 0; + while ((input = getopt(argc, argv, "Rh")) != -1) { + switch (input) + { + case 'R': + recursive = 1; + break; + case 'h': + usage(); + exit(EXIT_SUCCESS); + break; + case '?': + if (isprint(optopt)) + std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl; + else + std::cerr << "Unknown option character `" << (char) optopt << "'." << std::endl; + usage(); + exit(EXIT_FAILURE); + default: + exit(EXIT_FAILURE); + } + } + std::string group = argv[optind]; + //Owner stays the same, just group association changes. + std::string owner = ""; + std::string uri_path = argv[optind + 1]; + + //Building a URI object from the given uri_path + hdfs::optional uri = hdfs::URI::parse_from_string(uri_path); + if (!uri) { + std::cerr << "Malformed URI: " << uri_path << std::endl; + exit(EXIT_FAILURE); + } + + //TODO: HDFS-9539 Currently options can be returned empty + hdfs::Options options = *hdfs::getOptions(); + + //TODO: HDFS-9539 - until then we increase the time-out to allow all recursive async calls to finish + options.rpc_timeout = std::numeric_limits::max(); + + std::shared_ptr fs = hdfs::doConnect(uri.value(), options); + if (!fs) { + std::cerr << "Could not connect the file system. " << std::endl; + exit(EXIT_FAILURE); + } + + /* wrap async FileSystem::SetOwner with promise to make it a blocking call */ + std::shared_ptr> promise = std::make_shared>(); + std::future future(promise->get_future()); + auto handler = [promise](const hdfs::Status &s) { + promise->set_value(s); + }; + + if(!recursive){ + fs->SetOwner(uri->get_path(), owner, group, handler); + } + else { + //Allocating shared state, which includes: + //username and groupname to be set, handler to be called, request counter, and a boolean to keep track if find is done + std::shared_ptr state = std::make_shared(owner, group, handler, 0, false); + + // Keep requesting more from Find until we process the entire listing. Call handler when Find is done and reques counter is 0. + // Find guarantees that the handler will only be called once at a time so we do not need locking in handlerFind. + auto handlerFind = [fs, state](const hdfs::Status &status_find, const std::vector & stat_infos, bool has_more_results) -> bool { + + //For each result returned by Find we call async SetOwner with the handler below. + //SetOwner DOES NOT guarantee that the handler will only be called once at a time, so we DO need locking in handlerSetOwner. + auto handlerSetOwner = [state](const hdfs::Status &status_set_owner) { + std::lock_guard guard(state->lock); + + //Decrement the counter once since we are done with this async call + if (!status_set_owner.ok() && state->status.ok()){ + //We make sure we set state->status only on the first error. + state->status = status_set_owner; + } + state->request_counter--; + if(state->request_counter == 0 && state->find_is_done){ + state->handler(state->status); //exit + } + }; + if(!stat_infos.empty() && state->status.ok()) { + for (hdfs::StatInfo const& s : stat_infos) { + //Launch an asynchronous call to SetOwner for every returned result + state->request_counter++; + fs->SetOwner(s.full_path, state->username, state->groupname, handlerSetOwner); + } + } + + //Lock this section because handlerSetOwner might be accessing the same + //shared variables simultaneously + std::lock_guard guard(state->lock); + if (!status_find.ok() && state->status.ok()){ + //We make sure we set state->status only on the first error. + state->status = status_find; + } + if(!has_more_results){ + state->find_is_done = true; + if(state->request_counter == 0){ + state->handler(state->status); //exit + } + return false; + } + return true; + }; + + //Asynchronous call to Find + fs->Find(uri->get_path(), "*", hdfs::FileSystem::GetDefaultFindMaxDepth(), handlerFind); + } + + /* block until promise is set */ + hdfs::Status status = future.get(); + if (!status.ok()) { + std::cerr << "Error: " << status.ToString() << std::endl; + exit(EXIT_FAILURE); + } + + // Clean up static data and prevent valgrind memory leaks + google::protobuf::ShutdownProtobufLibrary(); + return 0; +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/4f6cb5d1/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_chmod.cpp ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_chmod.cpp b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_chmod.cpp new file mode 100644 index 0000000..0a001d6 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_chmod.cpp @@ -0,0 +1,194 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +#include +#include +#include +#include "tools_common.h" + +void usage(){ + std::cout << "Usage: hdfs_chmod [OPTION] FILE" + << std::endl + << std::endl << "Change the permissions of each FILE to MODE." + << std::endl << "The user must be the owner of the file, or else a super-user." + << std::endl << "Additional information is in the Permissions Guide:" + << std::endl << "https://hadoop.apache.org/docs/r2.7.1/hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html" + << std::endl + << std::endl << " -R operate on files and directories recursively" + << std::endl << " -h display this help and exit" + << std::endl + << std::endl << "Examples:" + << std::endl << "hdfs_chmod -R 755 hdfs://localhost.localdomain:9433/dir/file" + << std::endl << "hdfs_chmod 777 /dir/file" + << std::endl; +} + +struct SetPermissionState { + const uint16_t permissions; + const std::function handler; + //The request counter is incremented once every time SetOwner async call is made + uint64_t request_counter; + //This boolean will be set when find returns the last result + bool find_is_done; + //Final status to be returned + hdfs::Status status; + //Shared variables will need protection with a lock + std::mutex lock; + SetPermissionState(const uint16_t permissions_, const std::function & handler_, + uint64_t request_counter_, bool find_is_done_) + : permissions(permissions_), + handler(handler_), + request_counter(request_counter_), + find_is_done(find_is_done_), + status(), + lock() { + } +}; + +int main(int argc, char *argv[]) { + //We should have 3 or 4 parameters + if (argc != 3 && argc != 4) { + usage(); + exit(EXIT_FAILURE); + } + + bool recursive = false; + int input; + + //Using GetOpt to read in the values + opterr = 0; + while ((input = getopt(argc, argv, "Rh")) != -1) { + switch (input) + { + case 'R': + recursive = 1; + break; + case 'h': + usage(); + exit(EXIT_SUCCESS); + break; + case '?': + if (isprint(optopt)) + std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl; + else + std::cerr << "Unknown option character `" << (char) optopt << "'." << std::endl; + usage(); + exit(EXIT_FAILURE); + default: + exit(EXIT_FAILURE); + } + } + std::string permissions = argv[optind]; + std::string uri_path = argv[optind + 1]; + + //Building a URI object from the given uri_path + hdfs::optional uri = hdfs::URI::parse_from_string(uri_path); + if (!uri) { + std::cerr << "Malformed URI: " << uri_path << std::endl; + exit(EXIT_FAILURE); + } + + //TODO: HDFS-9539 Currently options can be returned empty + hdfs::Options options = *hdfs::getOptions(); + + //TODO: HDFS-9539 - until then we increase the time-out to allow all recursive async calls to finish + options.rpc_timeout = std::numeric_limits::max(); + + std::shared_ptr fs = hdfs::doConnect(uri.value(), options); + if (!fs) { + std::cerr << "Could not connect the file system. " << std::endl; + exit(EXIT_FAILURE); + } + + /* wrap async FileSystem::SetPermission with promise to make it a blocking call */ + std::shared_ptr> promise = std::make_shared>(); + std::future future(promise->get_future()); + auto handler = [promise](const hdfs::Status &s) { + promise->set_value(s); + }; + + //strtol() is reading the value with base 8, NULL because we are reading in just one value. + uint16_t perm = strtol(permissions.c_str(), NULL, 8); + if(!recursive){ + fs->SetPermission(uri->get_path(), perm, handler); + } + else { + //Allocating shared state, which includes: + //username and groupname to be set, handler to be called, request counter, and a boolean to keep track if find is done + std::shared_ptr state = std::make_shared(perm, handler, 0, false); + + // Keep requesting more from Find until we process the entire listing. Call handler when Find is done and reques counter is 0. + // Find guarantees that the handler will only be called once at a time so we do not need locking in handlerFind. + auto handlerFind = [fs, state](const hdfs::Status &status_find, const std::vector & stat_infos, bool has_more_results) -> bool { + + //For each result returned by Find we call async SetOwner with the handler below. + //SetOwner DOES NOT guarantee that the handler will only be called once at a time, so we DO need locking in handlerSetOwner. + auto handlerSetOwner = [state](const hdfs::Status &status_set_owner) { + std::lock_guard guard(state->lock); + + //Decrement the counter once since we are done with this async call + if (!status_set_owner.ok() && state->status.ok()){ + //We make sure we set state->status only on the first error. + state->status = status_set_owner; + } + state->request_counter--; + if(state->request_counter == 0 && state->find_is_done){ + state->handler(state->status); //exit + } + }; + if(!stat_infos.empty() && state->status.ok()) { + for (hdfs::StatInfo const& s : stat_infos) { + //Launch an asynchronous call to SetOwner for every returned result + state->request_counter++; + fs->SetPermission(s.full_path, state->permissions, handlerSetOwner); + } + } + + //Lock this section because handlerSetOwner might be accessing the same + //shared variables simultaneously + std::lock_guard guard(state->lock); + if (!status_find.ok() && state->status.ok()){ + //We make sure we set state->status only on the first error. + state->status = status_find; + } + if(!has_more_results){ + state->find_is_done = true; + if(state->request_counter == 0){ + state->handler(state->status); //exit + } + return false; + } + return true; + }; + + //Asynchronous call to Find + fs->Find(uri->get_path(), "*", hdfs::FileSystem::GetDefaultFindMaxDepth(), handlerFind); + } + + /* block until promise is set */ + hdfs::Status status = future.get(); + if (!status.ok()) { + std::cerr << "Error: " << status.ToString() << std::endl; + exit(EXIT_FAILURE); + } + + // Clean up static data and prevent valgrind memory leaks + google::protobuf::ShutdownProtobufLibrary(); + return 0; +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/4f6cb5d1/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_chown.cpp ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_chown.cpp b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_chown.cpp new file mode 100644 index 0000000..08724c6 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_chown.cpp @@ -0,0 +1,206 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +#include +#include +#include +#include "tools_common.h" + +void usage(){ + std::cout << "Usage: hdfs_chown [OPTION] [OWNER][:[GROUP]] FILE" + << std::endl + << std::endl << "Change the owner and/or group of each FILE to OWNER and/or GROUP." + << std::endl << "The user must be a super-user. Additional information is in the Permissions Guide:" + << std::endl << "https://hadoop.apache.org/docs/r2.7.1/hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html" + << std::endl + << std::endl << " -R operate on files and directories recursively" + << std::endl << " -h display this help and exit" + << std::endl + << std::endl << "Owner is unchanged if missing. Group is unchanged if missing." + << std::endl << "OWNER and GROUP may be numeric as well as symbolic." + << std::endl + << std::endl << "Examples:" + << std::endl << "hdfs_chown -R new_owner:new_group hdfs://localhost.localdomain:9433/dir/file" + << std::endl << "hdfs_chown new_owner /dir/file" + << std::endl; +} + +struct SetOwnerState { + const std::string username; + const std::string groupname; + const std::function handler; + //The request counter is incremented once every time SetOwner async call is made + uint64_t request_counter; + //This boolean will be set when find returns the last result + bool find_is_done; + //Final status to be returned + hdfs::Status status; + //Shared variables will need protection with a lock + std::mutex lock; + SetOwnerState(const std::string & username_, const std::string & groupname_, + const std::function & handler_, + uint64_t request_counter_, bool find_is_done_) + : username(username_), + groupname(groupname_), + handler(handler_), + request_counter(request_counter_), + find_is_done(find_is_done_), + status(), + lock() { + } +}; + +int main(int argc, char *argv[]) { + //We should have 3 or 4 parameters + if (argc != 3 && argc != 4) { + usage(); + exit(EXIT_FAILURE); + } + + bool recursive = false; + int input; + + //Using GetOpt to read in the values + opterr = 0; + while ((input = getopt(argc, argv, "Rh")) != -1) { + switch (input) + { + case 'R': + recursive = 1; + break; + case 'h': + usage(); + exit(EXIT_SUCCESS); + break; + case '?': + if (isprint(optopt)) + std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl; + else + std::cerr << "Unknown option character `" << (char) optopt << "'." << std::endl; + usage(); + exit(EXIT_FAILURE); + default: + exit(EXIT_FAILURE); + } + } + std::string owner_and_group = argv[optind]; + std::string uri_path = argv[optind + 1]; + + std::string owner, group; + size_t owner_end = owner_and_group.find(":"); + if(owner_end == std::string::npos) { + owner = owner_and_group; + } else { + owner = owner_and_group.substr(0, owner_end); + group = owner_and_group.substr(owner_end + 1); + } + + //Building a URI object from the given uri_path + hdfs::optional uri = hdfs::URI::parse_from_string(uri_path); + if (!uri) { + std::cerr << "Malformed URI: " << uri_path << std::endl; + exit(EXIT_FAILURE); + } + + //TODO: HDFS-9539 Currently options can be returned empty + hdfs::Options options = *hdfs::getOptions(); + + //TODO: HDFS-9539 - until then we increase the time-out to allow all recursive async calls to finish + options.rpc_timeout = std::numeric_limits::max(); + + std::shared_ptr fs = hdfs::doConnect(uri.value(), options); + if (!fs) { + std::cerr << "Could not connect the file system. " << std::endl; + exit(EXIT_FAILURE); + } + + /* wrap async FileSystem::SetOwner with promise to make it a blocking call */ + std::shared_ptr> promise = std::make_shared>(); + std::future future(promise->get_future()); + auto handler = [promise](const hdfs::Status &s) { + promise->set_value(s); + }; + + if(!recursive){ + fs->SetOwner(uri->get_path(), owner, group, handler); + } + else { + //Allocating shared state, which includes: + //username and groupname to be set, handler to be called, request counter, and a boolean to keep track if find is done + std::shared_ptr state = std::make_shared(owner, group, handler, 0, false); + + // Keep requesting more from Find until we process the entire listing. Call handler when Find is done and reques counter is 0. + // Find guarantees that the handler will only be called once at a time so we do not need locking in handlerFind. + auto handlerFind = [fs, state](const hdfs::Status &status_find, const std::vector & stat_infos, bool has_more_results) -> bool { + + //For each result returned by Find we call async SetOwner with the handler below. + //SetOwner DOES NOT guarantee that the handler will only be called once at a time, so we DO need locking in handlerSetOwner. + auto handlerSetOwner = [state](const hdfs::Status &status_set_owner) { + std::lock_guard guard(state->lock); + + //Decrement the counter once since we are done with this async call + if (!status_set_owner.ok() && state->status.ok()){ + //We make sure we set state->status only on the first error. + state->status = status_set_owner; + } + state->request_counter--; + if(state->request_counter == 0 && state->find_is_done){ + state->handler(state->status); //exit + } + }; + if(!stat_infos.empty() && state->status.ok()) { + for (hdfs::StatInfo const& s : stat_infos) { + //Launch an asynchronous call to SetOwner for every returned result + state->request_counter++; + fs->SetOwner(s.full_path, state->username, state->groupname, handlerSetOwner); + } + } + + //Lock this section because handlerSetOwner might be accessing the same + //shared variables simultaneously + std::lock_guard guard(state->lock); + if (!status_find.ok() && state->status.ok()){ + //We make sure we set state->status only on the first error. + state->status = status_find; + } + if(!has_more_results){ + state->find_is_done = true; + if(state->request_counter == 0){ + state->handler(state->status); //exit + } + return false; + } + return true; + }; + + //Asynchronous call to Find + fs->Find(uri->get_path(), "*", hdfs::FileSystem::GetDefaultFindMaxDepth(), handlerFind); + } + + /* block until promise is set */ + hdfs::Status status = future.get(); + if (!status.ok()) { + std::cerr << "Error: " << status.ToString() << std::endl; + exit(EXIT_FAILURE); + } + + // Clean up static data and prevent valgrind memory leaks + google::protobuf::ShutdownProtobufLibrary(); + return 0; +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/4f6cb5d1/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_find.cpp ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_find.cpp b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_find.cpp new file mode 100644 index 0000000..eca79c6 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_find.cpp @@ -0,0 +1,156 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +#include +#include +#include +#include "tools_common.h" + +void usage(){ + std::cout << "Usage: hdfs_find [OPTION] PATH" + << std::endl + << std::endl << "Finds all files recursively starting from the" + << std::endl << "specified PATH and prints their file paths." + << std::endl << "This hdfs_find tool mimics the POSIX find." + << std::endl + << std::endl << "Both PATH and NAME can have wild-cards." + << std::endl + << std::endl << " -n NAME if provided all results will be matching the NAME pattern" + << std::endl << " otherwise, the implicit '*' will be used" + << std::endl << " NAME allows wild-cards" + << std::endl + << std::endl << " -m MAX_DEPTH if provided the maximum depth to recurse after the end of" + << std::endl << " the path is reached will be limited by MAX_DEPTH" + << std::endl << " otherwise, the maximum depth to recurse is unbound" + << std::endl << " MAX_DEPTH can be set to 0 for pure globbing and ignoring" + << std::endl << " the NAME option (no recursion after the end of the path)" + << std::endl + << std::endl << " -h display this help and exit" + << std::endl + << std::endl << "Examples:" + << std::endl << "hdfs_find hdfs://localhost.localdomain:9433/dir?/tree* -n some?file*name" + << std::endl << "hdfs_find / -n file_name -m 3" + << std::endl; +} + +int main(int argc, char *argv[]) { + //We should have at least 2 arguments + if (argc < 2) { + usage(); + exit(EXIT_FAILURE); + } + + int input; + //If NAME is not specified we use implicit "*" + std::string name = "*"; + //If MAX_DEPTH is not specified we use the max value of uint_32_t + uint32_t max_depth = hdfs::FileSystem::GetDefaultFindMaxDepth(); + + //Using GetOpt to read in the values + opterr = 0; + while ((input = getopt(argc, argv, "hn:m:")) != -1) { + switch (input) + { + case 'h': + usage(); + exit(EXIT_SUCCESS); + break; + case 'n': + name = optarg; + break; + case 'm': + max_depth = std::stoi(optarg); + break; + case '?': + if (optopt == 'n' || optopt == 'm') + std::cerr << "Option -" << (char) optopt << " requires an argument." << std::endl; + else if (isprint(optopt)) + std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl; + else + std::cerr << "Unknown option character `" << (char) optopt << "'." << std::endl; + usage(); + exit(EXIT_FAILURE); + default: + exit(EXIT_FAILURE); + } + } + std::string uri_path = argv[optind]; + + //Building a URI object from the given uri_path + hdfs::optional uri = hdfs::URI::parse_from_string(uri_path); + if (!uri) { + std::cerr << "Malformed URI: " << uri_path << std::endl; + exit(EXIT_FAILURE); + } + + //TODO: HDFS-9539 Currently options can be returned empty + hdfs::Options options = *hdfs::getOptions(); + + //TODO: HDFS-9539 - until then we increase the time-out to allow all recursive async calls to finish + options.rpc_timeout = std::numeric_limits::max(); + + std::shared_ptr fs = hdfs::doConnect(uri.value(), options); + if (!fs) { + std::cerr << "Could not connect the file system. " << std::endl; + exit(EXIT_FAILURE); + } + + std::promise promise; + std::future future(promise.get_future()); + hdfs::Status status = hdfs::Status::OK(); + + /** + * Keep requesting more until we get the entire listing. Set the promise + * when we have the entire listing to stop. + * + * Find guarantees that the handler will only be called once at a time, + * so we do not need any locking here + */ + auto handler = [&promise, &status] + (const hdfs::Status &s, const std::vector & si, bool has_more_results) -> bool { + //Print result chunks as they arrive + if(!si.empty()) { + for (hdfs::StatInfo const& s : si) { + std::cout << s.full_path << std::endl; + } + } + if(!s.ok() && status.ok()){ + //We make sure we set 'status' only on the first error. + status = s; + } + if (!has_more_results) { + promise.set_value(); //set promise + return false; //request stop sending results + } + return true; //request more results + }; + + //Asynchronous call to Find + fs->Find(uri->get_path(), name, max_depth, handler); + + //block until promise is set + future.get(); + if(!status.ok()) { + std::cerr << "Error: " << status.ToString() << std::endl; + } + + // Clean up static data and prevent valgrind memory leaks + google::protobuf::ShutdownProtobufLibrary(); + return 0; +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/4f6cb5d1/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/tools_common.cpp ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/tools_common.cpp b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/tools_common.cpp new file mode 100644 index 0000000..af882ce --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/tools_common.cpp @@ -0,0 +1,70 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +#include "tools_common.h" + +namespace hdfs { + + std::shared_ptr getOptions() { + std::shared_ptr options = std::make_shared(); + //Setting the config path to the default: "$HADOOP_CONF_DIR" or "/etc/hadoop/conf" + hdfs::ConfigurationLoader loader; + //Loading default config files core-site.xml and hdfs-site.xml from the config path + hdfs::optional config = loader.LoadDefaultResources(); + //TODO: HDFS-9539 - after this is resolved, valid config will always be returned. + if(config){ + //Loading options from the config + *options = config->GetOptions(); + } + return options; + } + + std::shared_ptr doConnect(hdfs::URI & uri, hdfs::Options & options) { + IoService * io_service = IoService::New(); + //Wrapping fs into a shared pointer to guarantee deletion + std::shared_ptr fs(hdfs::FileSystem::New(io_service, "", options)); + if (!fs) { + std::cerr << "Could not create FileSystem object. " << std::endl; + exit(EXIT_FAILURE); + } + Status status; + //Check if the user supplied the host + if(!uri.get_host().empty()){ + //If port is supplied we use it, otherwise we use the empty string so that it will be looked up in configs. + std::string port = (uri.get_port()) ? std::to_string(uri.get_port().value()) : ""; + status = fs->Connect(uri.get_host(), port); + if (!status.ok()) { + std::cerr << "Could not connect to " << uri.get_host() << ":" << port << ". " << status.ToString() << std::endl; + exit(EXIT_FAILURE); + } + } else { + status = fs->ConnectToDefaultFs(); + if (!status.ok()) { + if(!options.defaultFS.get_host().empty()){ + std::cerr << "Error connecting to " << options.defaultFS << ". " << status.ToString() << std::endl; + } else { + std::cerr << "Error connecting to the cluster: defaultFS is empty. " << status.ToString() << std::endl; + } + exit(EXIT_FAILURE); + } + } + return fs; + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/4f6cb5d1/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/tools_common.h ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/tools_common.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/tools_common.h new file mode 100644 index 0000000..858fc4b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/tools_common.h @@ -0,0 +1,39 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +#ifndef TOOLS_COMMON_H_ +#define TOOLS_COMMON_H_ + +#include "hdfspp/hdfspp.h" +#include "common/hdfs_configuration.h" +#include "common/configuration_loader.h" + +#include + +namespace hdfs { + + //Pull configurations and get the Options object + std::shared_ptr getOptions(); + + //Build all necessary objects and perform the connection + std::shared_ptr doConnect(hdfs::URI & uri, hdfs::Options & options); + +} + +#endif --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org