Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 46B7D2004F1 for ; Wed, 30 Aug 2017 22:04:12 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 457A4169E46; Wed, 30 Aug 2017 20:04:12 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 4BE60169E4C for ; Wed, 30 Aug 2017 22:04:11 +0200 (CEST) Received: (qmail 59535 invoked by uid 500); 30 Aug 2017 20:04:10 -0000 Mailing-List: contact dev-help@quickstep.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@quickstep.incubator.apache.org Delivered-To: mailing list dev@quickstep.incubator.apache.org Received: (qmail 59170 invoked by uid 99); 30 Aug 2017 20:04:08 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd1-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 30 Aug 2017 20:04:08 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd1-us-west.apache.org (ASF Mail Server at spamd1-us-west.apache.org) with ESMTP id B313DCDFA3 for ; Wed, 30 Aug 2017 20:04:07 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd1-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -4.021 X-Spam-Level: X-Spam-Status: No, score=-4.021 tagged_above=-999 required=6.31 tests=[KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-0.001] autolearn=disabled Received: from mx1-lw-us.apache.org ([10.40.0.8]) by localhost (spamd1-us-west.apache.org [10.40.0.7]) (amavisd-new, port 10024) with ESMTP id CS2KGLmuTB2N for ; Wed, 30 Aug 2017 20:04:07 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-lw-us.apache.org (ASF Mail Server at mx1-lw-us.apache.org) with SMTP id E873861263 for ; Wed, 30 Aug 2017 20:04:05 +0000 (UTC) Received: (qmail 58376 invoked by uid 99); 30 Aug 2017 20:04:05 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 30 Aug 2017 20:04:05 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 0889DF55A0; Wed, 30 Aug 2017 20:04:01 +0000 (UTC) From: zuyu To: dev@quickstep.incubator.apache.org Reply-To: dev@quickstep.incubator.apache.org References: In-Reply-To: Subject: [GitHub] incubator-quickstep pull request #291: Add "COPY TO" operator for exporting ... Content-Type: text/plain Message-Id: <20170830200402.0889DF55A0@git1-us-west.apache.org> Date: Wed, 30 Aug 2017 20:04:01 +0000 (UTC) archived-at: Wed, 30 Aug 2017 20:04:12 -0000 Github user zuyu commented on a diff in the pull request: https://github.com/apache/incubator-quickstep/pull/291#discussion_r136162772 --- Diff: query_optimizer/resolver/Resolver.cpp --- @@ -418,27 +455,157 @@ L::LogicalPtr Resolver::resolve(const ParseStatement &parse_query) { } L::LogicalPtr Resolver::resolveCopyFrom( - const ParseStatementCopyFrom ©_from_statement) { - // Default parameters. - std::string column_delimiter_ = "\t"; - bool escape_strings_ = true; + const ParseStatementCopy ©_from_statement) { + DCHECK(copy_from_statement.getCopyDirection() == ParseStatementCopy::kFrom); + const PtrList *params = copy_from_statement.params(); - const ParseCopyFromParams *params = copy_from_statement.params(); + BulkIOFormat file_format = BulkIOFormat::kText; if (params != nullptr) { - if (params->delimiter != nullptr) { - column_delimiter_ = params->delimiter->value(); - if (column_delimiter_.size() != 1) { - THROW_SQL_ERROR_AT(params->delimiter) - << "DELIMITER is not a single character"; + for (const ParseKeyValue ¶m : *params) { + const std::string &key = ToLower(param.key()->value()); + if (key == "format") { + const ParseString *parse_format = GetKeyValueString(param); + const std::string format = ToLower(parse_format->value()); + // TODO(jianqiao): Support other bulk load formats such as CSV. + if (format != "text") { + THROW_SQL_ERROR_AT(parse_format) << "Unsupported file format: " << format; + } + // Update file_format when other formats get supported. + break; + } + } + } + + std::unique_ptr options = + std::make_unique(file_format); + if (params != nullptr) { + for (const ParseKeyValue ¶m : *params) { + const std::string &key = ToLower(param.key()->value()); + if (key == "delimiter") { + const ParseString *parse_delimiter = GetKeyValueString(param); + const std::string &delimiter = parse_delimiter->value(); + if (delimiter.size() != 1) { + THROW_SQL_ERROR_AT(parse_delimiter) + << "DELIMITER is not a single character"; + } + options->setDelimiter(delimiter.front()); + } else if (key == "escape_strings") { + options->setEscapeStrings(GetKeyValueBool(param)); + } else if (key != "format") { + THROW_SQL_ERROR_AT(¶m) << "Unsupported copy option: " << key; } } - escape_strings_ = params->escape_strings; } return L::CopyFrom::Create(resolveRelationName(copy_from_statement.relation_name()), - copy_from_statement.source_filename()->value(), - column_delimiter_[0], - escape_strings_); + copy_from_statement.file_name()->value(), + BulkIOConfigurationPtr(options.release())); +} + +L::LogicalPtr Resolver::resolveCopyTo( + const ParseStatementCopy ©_to_statement) { + DCHECK(copy_to_statement.getCopyDirection() == ParseStatementCopy::kTo); + const PtrList *params = copy_to_statement.params(); + + // Check if copy format is explicitly specified. + BulkIOFormat file_format = BulkIOFormat::kText; + bool format_specified = false; + if (params != nullptr) { + for (const ParseKeyValue ¶m : *params) { + const std::string &key = ToLower(param.key()->value()); + if (key == "format") { + const ParseString *parse_format = GetKeyValueString(param); + const std::string format = ToLower(parse_format->value()); + if (format == "csv") { + file_format = BulkIOFormat::kCSV; + } else if (format == "text") { + file_format = BulkIOFormat::kText; + } else { + THROW_SQL_ERROR_AT(parse_format) << "Unsupported file format: " << format; + } + format_specified = true; + break; + } + } + } + + const std::string &file_name = copy_to_statement.file_name()->value(); + if (file_name.length() <= 1) { + THROW_SQL_ERROR_AT(copy_to_statement.file_name()) + << "File name can not be empty"; + } + + // Infer copy format from file name extension. + if (!format_specified) { + if (file_name.length() > 4) { + if (ToLower(file_name.substr(file_name.length() - 4)) == ".csv") { + file_format = BulkIOFormat::kCSV; + } + } + } + + // Resolve the copy options. + std::unique_ptr options = + std::make_unique(file_format); + if (params != nullptr) { + for (const ParseKeyValue ¶m : *params) { + const std::string &key = ToLower(param.key()->value()); + if (key == "delimiter") { + const ParseString *parse_delimiter = GetKeyValueString(param); + const std::string &delimiter = parse_delimiter->value(); + if (delimiter.size() != 1) { --- End diff -- Should we change to `1u` to avoid unsign-vs-sign-compare warning? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastructure@apache.org or file a JIRA ticket with INFRA. ---