Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 24F5C200D4E for ; Thu, 7 Dec 2017 21:16:58 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 23746160C0C; Thu, 7 Dec 2017 20:16:58 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 41A57160C08 for ; Thu, 7 Dec 2017 21:16:57 +0100 (CET) Received: (qmail 20348 invoked by uid 500); 7 Dec 2017 20:16:56 -0000 Mailing-List: contact dev-help@orc.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@orc.apache.org Delivered-To: mailing list dev@orc.apache.org Received: (qmail 20335 invoked by uid 99); 7 Dec 2017 20:16:56 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 07 Dec 2017 20:16:56 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 1C2C5E0433; Thu, 7 Dec 2017 20:16:56 +0000 (UTC) From: moresandeep To: dev@orc.apache.org Reply-To: dev@orc.apache.org References: In-Reply-To: Subject: [GitHub] orc pull request #184: Orc 256 unmask range option Content-Type: text/plain Message-Id: <20171207201656.1C2C5E0433@git1-us-west.apache.org> Date: Thu, 7 Dec 2017 20:16:56 +0000 (UTC) archived-at: Thu, 07 Dec 2017 20:16:58 -0000 Github user moresandeep commented on a diff in the pull request: https://github.com/apache/orc/pull/184#discussion_r155629181 --- Diff: java/core/src/test/org/apache/orc/impl/mask/TestUnmaskRange.java --- @@ -0,0 +1,165 @@ +package org.apache.orc.impl.mask; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.junit.Test; + +import java.nio.charset.StandardCharsets; + +import static org.junit.Assert.assertEquals; + +/** + * Test Unmask option + */ +public class TestUnmaskRange { + + public TestUnmaskRange() { + super(); + } + + /* Test for Long */ + @Test + public void testSimpleLongRangeMask() { + RedactMaskFactory mask = new RedactMaskFactory("9", "", "0:2"); + long result = mask.maskLong(123456); + assertEquals(123_999, result); + + // negative index + mask = new RedactMaskFactory("9", "", "-3:-1"); + result = mask.maskLong(123456); + assertEquals(999_456, result); + + // out of range mask, return the original mask + mask = new RedactMaskFactory("9", "", "7:10"); + result = mask.maskLong(123456); + assertEquals(999999, result); + + } + + @Test + public void testDefaultRangeMask() { + RedactMaskFactory mask = new RedactMaskFactory("9", "", ""); + long result = mask.maskLong(123456); + assertEquals(999999, result); + + mask = new RedactMaskFactory("9"); + result = mask.maskLong(123456); + assertEquals(999999, result); + + } + + @Test + public void testCCRangeMask() { + long cc = 4716885592186382L; + long maskedCC = 4716_77777777_6382L; + // Range unmask for first 4 and last 4 of credit card number + final RedactMaskFactory mask = new RedactMaskFactory("Xx7", "", "0:3,-4:-1"); + long result = mask.maskLong(cc); + + assertEquals(String.valueOf(cc).length(), String.valueOf(result).length()); + assertEquals(4716_77777777_6382L, result); + } + + /* Tests for Double */ + @Test + public void testSimpleDoubleRangeMask() { + RedactMaskFactory mask = new RedactMaskFactory("Xx7", "", "0:2"); + assertEquals(1237.77, mask.maskDouble(1234.99), 0.000001); + assertEquals(12377.7, mask.maskDouble(12345.9), 0.000001); + + mask = new RedactMaskFactory("Xx7", "", "-3:-1"); + assertEquals(7774.9, mask.maskDouble(1234.9), 0.000001); + + } + + /* test for String */ + @Test + public void testStringRangeMask() { + + BytesColumnVector source = new BytesColumnVector(); + BytesColumnVector target = new BytesColumnVector(); + target.reset(); + + byte[] input = "Mary had 1 little lamb!!".getBytes(StandardCharsets.UTF_8); + source.setRef(0, input, 0, input.length); + + // Set a 4 byte chinese character (U+2070E), which is letter other + input = "\uD841\uDF0E".getBytes(StandardCharsets.UTF_8); + source.setRef(1, input, 0, input.length); + + RedactMaskFactory mask = new RedactMaskFactory("", "", "0:3, -5:-1"); + for(int r=0; r < 2; ++r) { + mask.maskString(source, r, target); + } + + assertEquals("Mary xxx 9 xxxxxx xamb!!", new String(target.vector[0], + target.start[0], target.length[0], StandardCharsets.UTF_8)); + assertEquals("\uD841\uDF0E", new String(target.vector[1], + target.start[1], target.length[1], StandardCharsets.UTF_8)); + + // test defaults, no-unmask range + mask = new RedactMaskFactory(); + for(int r=0; r < 2; ++r) { + mask.maskString(source, r, target); + } + + assertEquals("Xxxx xxx 9 xxxxxx xxxx..", new String(target.vector[0], + target.start[0], target.length[0], StandardCharsets.UTF_8)); + assertEquals("ª", new String(target.vector[1], + target.start[1], target.length[1], StandardCharsets.UTF_8)); + + + // test out of range string mask + mask = new RedactMaskFactory("", "", "-1:-5"); + for(int r=0; r < 2; ++r) { + mask.maskString(source, r, target); + } + + assertEquals("Xxxx xxx 9 xxxxxx xxxx..", new String(target.vector[0], + target.start[0], target.length[0], StandardCharsets.UTF_8)); + assertEquals("ª", new String(target.vector[1], + target.start[1], target.length[1], StandardCharsets.UTF_8)); + + } + + /* test for Decimal */ + @Test + public void testDecimalRangeMask() { + + RedactMaskFactory mask = new RedactMaskFactory("Xx7", "", "0:3"); + assertEquals(new HiveDecimalWritable("123477.777"), + mask.maskDecimal(new HiveDecimalWritable("123456.789"))); + + // try with a reverse index + mask = new RedactMaskFactory("Xx7", "", "-3:-1, 0:3"); + assertEquals(new HiveDecimalWritable("123477777.777654"), + mask.maskDecimal(new HiveDecimalWritable("123456789.987654"))); + + // test removal of leading and trailing zeros. + /* + assertEquals(new HiveDecimalWritable("777777777777777777.7777"), + mask.maskDecimal(new HiveDecimalWritable("0123456789123456789.01230"))); + */ + --- End diff -- ok, will do. ---