orc-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From moresandeep <...@git.apache.org>
Subject [GitHub] orc pull request #184: Orc 256 unmask range option
Date Thu, 07 Dec 2017 20:16:56 GMT
Github user moresandeep commented on a diff in the pull request:

    https://github.com/apache/orc/pull/184#discussion_r155629181
  
    --- Diff: java/core/src/test/org/apache/orc/impl/mask/TestUnmaskRange.java ---
    @@ -0,0 +1,165 @@
    +package org.apache.orc.impl.mask;
    +
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with this
    + * work for additional information regarding copyright ownership.  The ASF
    + * licenses this file to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance with the License.
    + * You may obtain a copy of the License at
    + * <p>
    + * http://www.apache.org/licenses/LICENSE-2.0
    + * <p>
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
    + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
    + * License for the specific language governing permissions and limitations under
    + * the License.
    + */
    +
    +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
    +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
    +import org.junit.Test;
    +
    +import java.nio.charset.StandardCharsets;
    +
    +import static org.junit.Assert.assertEquals;
    +
    +/**
    + * Test Unmask option
    + */
    +public class TestUnmaskRange {
    +
    +  public TestUnmaskRange() {
    +    super();
    +  }
    +
    +  /* Test for Long */
    +  @Test
    +  public void testSimpleLongRangeMask() {
    +    RedactMaskFactory mask = new RedactMaskFactory("9", "", "0:2");
    +    long result = mask.maskLong(123456);
    +    assertEquals(123_999, result);
    +
    +    // negative index
    +    mask = new RedactMaskFactory("9", "", "-3:-1");
    +    result = mask.maskLong(123456);
    +    assertEquals(999_456, result);
    +
    +    // out of range mask, return the original mask
    +    mask = new RedactMaskFactory("9", "", "7:10");
    +    result = mask.maskLong(123456);
    +    assertEquals(999999, result);
    +
    +  }
    +
    +  @Test
    +  public void testDefaultRangeMask() {
    +    RedactMaskFactory mask = new RedactMaskFactory("9", "", "");
    +    long result = mask.maskLong(123456);
    +    assertEquals(999999, result);
    +
    +    mask = new RedactMaskFactory("9");
    +    result = mask.maskLong(123456);
    +    assertEquals(999999, result);
    +
    +  }
    +
    +  @Test
    +  public void testCCRangeMask() {
    +    long cc = 4716885592186382L;
    +    long maskedCC = 4716_77777777_6382L;
    +    // Range unmask for first 4 and last 4 of credit card number
    +    final RedactMaskFactory mask = new RedactMaskFactory("Xx7", "", "0:3,-4:-1");
    +    long result = mask.maskLong(cc);
    +
    +    assertEquals(String.valueOf(cc).length(), String.valueOf(result).length());
    +    assertEquals(4716_77777777_6382L, result);
    +  }
    +
    +  /* Tests for Double */
    +  @Test
    +  public void testSimpleDoubleRangeMask() {
    +    RedactMaskFactory mask = new RedactMaskFactory("Xx7", "", "0:2");
    +    assertEquals(1237.77, mask.maskDouble(1234.99), 0.000001);
    +    assertEquals(12377.7, mask.maskDouble(12345.9), 0.000001);
    +
    +    mask = new RedactMaskFactory("Xx7", "", "-3:-1");
    +    assertEquals(7774.9, mask.maskDouble(1234.9), 0.000001);
    +
    +  }
    +
    +  /* test for String */
    +  @Test
    +  public void testStringRangeMask() {
    +
    +    BytesColumnVector source = new BytesColumnVector();
    +    BytesColumnVector target = new BytesColumnVector();
    +    target.reset();
    +
    +    byte[] input = "Mary had 1 little lamb!!".getBytes(StandardCharsets.UTF_8);
    +    source.setRef(0, input, 0, input.length);
    +
    +    // Set a 4 byte chinese character (U+2070E), which is letter other
    +    input = "\uD841\uDF0E".getBytes(StandardCharsets.UTF_8);
    +    source.setRef(1, input, 0, input.length);
    +
    +    RedactMaskFactory mask = new RedactMaskFactory("", "", "0:3, -5:-1");
    +    for(int r=0; r < 2; ++r) {
    +      mask.maskString(source, r, target);
    +    }
    +
    +    assertEquals("Mary xxx 9 xxxxxx xamb!!", new String(target.vector[0],
    +        target.start[0], target.length[0], StandardCharsets.UTF_8));
    +    assertEquals("\uD841\uDF0E", new String(target.vector[1],
    +        target.start[1], target.length[1], StandardCharsets.UTF_8));
    +
    +    // test defaults, no-unmask range
    +    mask = new RedactMaskFactory();
    +    for(int r=0; r < 2; ++r) {
    +      mask.maskString(source, r, target);
    +    }
    +
    +    assertEquals("Xxxx xxx 9 xxxxxx xxxx..", new String(target.vector[0],
    +        target.start[0], target.length[0], StandardCharsets.UTF_8));
    +    assertEquals("ª", new String(target.vector[1],
    +        target.start[1], target.length[1], StandardCharsets.UTF_8));
    +
    +
    +    // test out of range string mask
    +    mask = new RedactMaskFactory("", "", "-1:-5");
    +    for(int r=0; r < 2; ++r) {
    +      mask.maskString(source, r, target);
    +    }
    +
    +    assertEquals("Xxxx xxx 9 xxxxxx xxxx..", new String(target.vector[0],
    +        target.start[0], target.length[0], StandardCharsets.UTF_8));
    +    assertEquals("ª", new String(target.vector[1],
    +        target.start[1], target.length[1], StandardCharsets.UTF_8));
    +
    +  }
    +
    +  /* test for Decimal */
    +  @Test
    +  public void testDecimalRangeMask() {
    +
    +    RedactMaskFactory mask = new RedactMaskFactory("Xx7", "", "0:3");
    +    assertEquals(new HiveDecimalWritable("123477.777"),
    +        mask.maskDecimal(new HiveDecimalWritable("123456.789")));
    +
    +    // try with a reverse index
    +    mask = new RedactMaskFactory("Xx7", "", "-3:-1, 0:3");
    +    assertEquals(new HiveDecimalWritable("123477777.777654"),
    +        mask.maskDecimal(new HiveDecimalWritable("123456789.987654")));
    +
    +    // test removal of leading and  trailing zeros.
    +    /*
    +    assertEquals(new HiveDecimalWritable("777777777777777777.7777"),
    +        mask.maskDecimal(new HiveDecimalWritable("0123456789123456789.01230")));
    +        */
    +
    --- End diff --
    
    ok, will do.


---

Mime
View raw message