lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Simon Willnauer <simon.willna...@googlemail.com>
Subject Re: svn commit: r1181104 - in /lucene/dev/trunk/lucene/src: java/org/apache/lucene/index/values/TypePromoter.java test/org/apache/lucene/index/values/TestTypePromotion.java
Date Mon, 10 Oct 2011 19:30:00 GMT
ah crap! thanks robert!


On Mon, Oct 10, 2011 at 8:05 PM,  <rmuir@apache.org> wrote:
> Author: rmuir
> Date: Mon Oct 10 18:05:18 2011
> New Revision: 1181104
>
> URL: http://svn.apache.org/viewvc?rev=1181104&view=rev
> Log:
> LUCENE-3186: svn add
>
> Added:
>    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java
  (with props)
>    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
  (with props)
>
> Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java?rev=1181104&view=auto
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java
(added)
> +++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/TypePromoter.java
Mon Oct 10 18:05:18 2011
> @@ -0,0 +1,204 @@
> +package org.apache.lucene.index.values;
> +
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License.  You may obtain a copy of the License at
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +import java.util.HashMap;
> +import java.util.Map;
> +
> +/**
> + * Type promoter that promotes {@link IndexDocValues} during merge based on
> + * their {@link ValueType} and {@link #getValueSize()}
> + *
> + * @lucene.internal
> + */
> +public class TypePromoter {
> +
> +  private final static Map<Integer, ValueType> FLAGS_MAP = new HashMap<Integer,
ValueType>();
> +  private static final TypePromoter IDENTITY_PROMOTER = new IdentityTypePromoter();
> +  public static final int VAR_TYPE_VALUE_SIZE = -1;
> +
> +  private static final int IS_INT = 1 << 0;
> +  private static final int IS_BYTE = 1 << 1;
> +  private static final int IS_FLOAT = 1 << 2;
> +  /* VAR & FIXED == VAR */
> +  private static final int IS_VAR = 1 << 3;
> +  private static final int IS_FIXED = 1 << 3 | 1 << 4;
> +  /* if we have FIXED & FIXED with different size we promote to VAR */
> +  private static final int PROMOTE_TO_VAR_SIZE_MASK = ~(1 << 3);
> +  /* STRAIGHT & DEREF == STRAIGHT (dense values win) */
> +  private static final int IS_STRAIGHT = 1 << 5;
> +  private static final int IS_DEREF = 1 << 5 | 1 << 6;
> +  private static final int IS_SORTED = 1 << 7;
> +  /* more bits wins (int16 & int32 == int32) */
> +  private static final int IS_8_BIT = 1 << 8 | 1 << 9 | 1 << 10 |
1 << 11;
> +  private static final int IS_16_BIT = 1 << 9 | 1 << 10 | 1 << 11;
> +  private static final int IS_32_BIT = 1 << 10 | 1 << 11;
> +  private static final int IS_64_BIT = 1 << 11;
> +
> +  private final ValueType type;
> +  private final int flags;
> +  private final int valueSize;
> +
> +  /**
> +   * Returns a positive value size if this {@link TypePromoter} represents a
> +   * fixed variant, otherwise <code>-1</code>
> +   *
> +   * @return a positive value size if this {@link TypePromoter} represents a
> +   *         fixed variant, otherwise <code>-1</code>
> +   */
> +  public int getValueSize() {
> +    return valueSize;
> +  }
> +
> +  static {
> +    for (ValueType type : ValueType.values()) {
> +      TypePromoter create = create(type, VAR_TYPE_VALUE_SIZE);
> +      FLAGS_MAP.put(create.flags, type);
> +    }
> +  }
> +
> +  /**
> +   * Creates a new {@link TypePromoter}
> +   *
> +   * @param type
> +   *          the {@link ValueType} this promoter represents
> +   * @param flags
> +   *          the promoters flags
> +   * @param valueSize
> +   *          the value size if {@link #IS_FIXED} or <code>-1</code>
otherwise.
> +   */
> +  protected TypePromoter(ValueType type, int flags, int valueSize) {
> +    this.type = type;
> +    this.flags = flags;
> +    this.valueSize = valueSize;
> +  }
> +
> +  /**
> +   * Creates a new promoted {@link TypePromoter} based on this and the given
> +   * {@link TypePromoter} or <code>null</code> iff the {@link TypePromoter}
> +   * aren't compatible.
> +   *
> +   * @param promoter
> +   *          the incoming promoter
> +   * @return a new promoted {@link TypePromoter} based on this and the given
> +   *         {@link TypePromoter} or <code>null</code> iff the
> +   *         {@link TypePromoter} aren't compatible.
> +   */
> +  public TypePromoter promote(TypePromoter promoter) {
> +
> +    int promotedFlags = promoter.flags & this.flags;
> +    TypePromoter promoted = create(FLAGS_MAP.get(promotedFlags), valueSize);
> +    if (promoted == null) {
> +      return promoted;
> +    }
> +    if ((promoted.flags & IS_BYTE) != 0 && (promoted.flags & IS_FIXED)
== IS_FIXED) {
> +      if (this.valueSize == promoter.valueSize) {
> +        return promoted;
> +      }
> +      return create(FLAGS_MAP.get(promoted.flags & PROMOTE_TO_VAR_SIZE_MASK),
> +          VAR_TYPE_VALUE_SIZE);
> +    }
> +    return promoted;
> +
> +  }
> +
> +  /**
> +   * Returns the {@link ValueType} of this {@link TypePromoter}
> +   *
> +   * @return the {@link ValueType} of this {@link TypePromoter}
> +   */
> +  public ValueType type() {
> +    return type;
> +  }
> +
> +  @Override
> +  public String toString() {
> +    return "TypePromoter [type=" + type + ", sizeInBytes=" + valueSize + "]";
> +  }
> +
> +  /**
> +   * Creates a new {@link TypePromoter} for the given type and size per value.
> +   *
> +   * @param type
> +   *          the {@link ValueType} to create the promoter for
> +   * @param valueSize
> +   *          the size per value in bytes or <code>-1</code> iff the
types have
> +   *          variable length.
> +   * @return a new {@link TypePromoter}
> +   */
> +  public static TypePromoter create(ValueType type, int valueSize) {
> +    if (type == null) {
> +      return null;
> +    }
> +    switch (type) {
> +    case BYTES_FIXED_DEREF:
> +      return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_DEREF, valueSize);
> +    case BYTES_FIXED_SORTED:
> +      return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_SORTED, valueSize);
> +    case BYTES_FIXED_STRAIGHT:
> +      return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_STRAIGHT, valueSize);
> +    case BYTES_VAR_DEREF:
> +      return new TypePromoter(type, IS_BYTE | IS_VAR | IS_DEREF, VAR_TYPE_VALUE_SIZE);
> +    case BYTES_VAR_SORTED:
> +      return new TypePromoter(type, IS_BYTE | IS_VAR | IS_SORTED, VAR_TYPE_VALUE_SIZE);
> +    case BYTES_VAR_STRAIGHT:
> +      return new TypePromoter(type, IS_BYTE | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE);
> +    case FIXED_INTS_16:
> +      return new TypePromoter(type,
> +          IS_INT | IS_FIXED | IS_STRAIGHT | IS_16_BIT, valueSize);
> +    case FIXED_INTS_32:
> +      return new TypePromoter(type,
> +          IS_INT | IS_FIXED | IS_STRAIGHT | IS_32_BIT, valueSize);
> +    case FIXED_INTS_64:
> +      return new TypePromoter(type,
> +          IS_INT | IS_FIXED | IS_STRAIGHT | IS_64_BIT, valueSize);
> +    case FIXED_INTS_8:
> +      return new TypePromoter(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_8_BIT,
> +          valueSize);
> +    case FLOAT_32:
> +      return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT
> +          | IS_32_BIT, valueSize);
> +    case FLOAT_64:
> +      return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT
> +          | IS_64_BIT, valueSize);
> +    case VAR_INTS:
> +      return new TypePromoter(type, IS_INT | IS_VAR | IS_STRAIGHT, VAR_TYPE_VALUE_SIZE);
> +    default:
> +      throw new IllegalStateException();
> +    }
> +  }
> +
> +  /**
> +   * Returns a {@link TypePromoter} that always promotes to the type provided to
> +   * {@link #promote(TypePromoter)}
> +   */
> +  public static TypePromoter getIdentityPromoter() {
> +    return IDENTITY_PROMOTER;
> +  }
> +
> +  private static class IdentityTypePromoter extends TypePromoter {
> +
> +    public IdentityTypePromoter() {
> +      super(null, 0, -1);
> +    }
> +
> +    @Override
> +    public TypePromoter promote(TypePromoter promoter) {
> +      return promoter;
> +    }
> +  }
> +}
> \ No newline at end of file
>
> Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java?rev=1181104&view=auto
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
(added)
> +++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
Mon Oct 10 18:05:18 2011
> @@ -0,0 +1,313 @@
> +package org.apache.lucene.index.values;
> +
> +import java.io.IOException;
> +import java.util.EnumSet;
> +import java.util.Random;
> +
> +import org.apache.lucene.analysis.MockAnalyzer;
> +import org.apache.lucene.document.Document;
> +import org.apache.lucene.document.Field;
> +import org.apache.lucene.document.IndexDocValuesField;
> +import org.apache.lucene.document.TextField;
> +import org.apache.lucene.index.CorruptIndexException;
> +import org.apache.lucene.index.IndexReader;
> +import org.apache.lucene.index.IndexReader.ReaderContext;
> +import org.apache.lucene.index.IndexWriter;
> +import org.apache.lucene.index.IndexWriterConfig;
> +import org.apache.lucene.index.NoMergePolicy;
> +import org.apache.lucene.index.codecs.CodecProvider;
> +import org.apache.lucene.index.values.IndexDocValues.Source;
> +import org.apache.lucene.store.Directory;
> +import org.apache.lucene.util.BytesRef;
> +import org.apache.lucene.util.LuceneTestCase;
> +import org.junit.Before;
> +
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements. See the NOTICE file distributed with this
> + * work for additional information regarding copyright ownership. The ASF
> + * licenses this file to You under the Apache License, Version 2.0 (the
> + * "License"); you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
> + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
> + * License for the specific language governing permissions and limitations under
> + * the License.
> + */
> +public class TestTypePromotion extends LuceneTestCase {
> +  @Before
> +  public void setUp() throws Exception {
> +    super.setUp();
> +    assumeFalse("cannot work with preflex codec", CodecProvider.getDefault()
> +        .getDefaultFieldCodec().equals("PreFlex"));
> +  }
> +
> +  private static EnumSet<ValueType> INTEGERS = EnumSet.of(ValueType.VAR_INTS,
> +      ValueType.FIXED_INTS_16, ValueType.FIXED_INTS_32,
> +      ValueType.FIXED_INTS_64, ValueType.FIXED_INTS_8);
> +
> +  private static EnumSet<ValueType> FLOATS = EnumSet.of(ValueType.FLOAT_32,
> +      ValueType.FLOAT_64);
> +
> +  private static EnumSet<ValueType> UNSORTED_BYTES = EnumSet.of(
> +      ValueType.BYTES_FIXED_DEREF, ValueType.BYTES_FIXED_STRAIGHT,
> +      ValueType.BYTES_VAR_STRAIGHT, ValueType.BYTES_VAR_DEREF);
> +
> +  private static EnumSet<ValueType> SORTED_BYTES = EnumSet.of(
> +      ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_VAR_SORTED);
> +
> +  public ValueType randomValueType(EnumSet<ValueType> typeEnum, Random random)
{
> +    ValueType[] array = typeEnum.toArray(new ValueType[0]);
> +    return array[random.nextInt(array.length)];
> +  }
> +
> +  private static enum TestType {
> +    Int, Float, Byte
> +  }
> +
> +  private void runTest(EnumSet<ValueType> types, TestType type)
> +      throws CorruptIndexException, IOException {
> +    Directory dir = newDirectory();
> +    IndexWriter writer = new IndexWriter(dir,
> +        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
> +    int num_1 = atLeast(200);
> +    int num_2 = atLeast(200);
> +    int num_3 = atLeast(200);
> +    long[] values = new long[num_1 + num_2 + num_3];
> +    index(writer, new IndexDocValuesField("promote"),
> +        randomValueType(types, random), values, 0, num_1);
> +    writer.commit();
> +
> +    index(writer, new IndexDocValuesField("promote"),
> +        randomValueType(types, random), values, num_1, num_2);
> +    writer.commit();
> +
> +    if (random.nextInt(4) == 0) {
> +      // once in a while use addIndexes
> +      writer.optimize();
> +
> +      Directory dir_2 = newDirectory() ;
> +      IndexWriter writer_2 = new IndexWriter(dir_2,
> +          newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
> +      index(writer_2, new IndexDocValuesField("promote"),
> +          randomValueType(types, random), values, num_1 + num_2, num_3);
> +      writer_2.commit();
> +      writer_2.close();
> +      if (random.nextBoolean()) {
> +        writer.addIndexes(dir_2);
> +      } else {
> +        // do a real merge here
> +        IndexReader open = IndexReader.open(dir_2);
> +        writer.addIndexes(open);
> +        open.close();
> +      }
> +      dir_2.close();
> +    } else {
> +      index(writer, new IndexDocValuesField("promote"),
> +          randomValueType(types, random), values, num_1 + num_2, num_3);
> +    }
> +
> +    writer.optimize();
> +    writer.close();
> +    assertValues(type, dir, values);
> +    dir.close();
> +  }
> +
> +  private void assertValues(TestType type, Directory dir, long[] values)
> +      throws CorruptIndexException, IOException {
> +    IndexReader reader = IndexReader.open(dir);
> +    assertTrue(reader.isOptimized());
> +    ReaderContext topReaderContext = reader.getTopReaderContext();
> +    ReaderContext[] children = topReaderContext.children();
> +    IndexDocValues docValues = children[0].reader.docValues("promote");
> +    assertEquals(1, children.length);
> +    Source directSource = docValues.getDirectSource();
> +    for (int i = 0; i < values.length; i++) {
> +      int id = Integer.parseInt(reader.document(i).get("id"));
> +      String msg = "id: " + id + " doc: " + i;
> +      switch (type) {
> +      case Byte:
> +        BytesRef bytes = directSource.getBytes(i, new BytesRef());
> +        long value = 0;
> +        switch(bytes.length) {
> +        case 1:
> +          value = bytes.bytes[bytes.offset];
> +          break;
> +        case 2:
> +          value = bytes.asShort();
> +          break;
> +        case 4:
> +          value = bytes.asInt();
> +          break;
> +        case 8:
> +          value = bytes.asLong();
> +          break;
> +
> +        default:
> +          fail(msg + " bytessize: " + bytes.length);
> +        }
> +
> +        assertEquals(msg  + " byteSize: " + bytes.length, values[id], value);
> +        break;
> +      case Float:
> +          assertEquals(msg, values[id], Double.doubleToRawLongBits(directSource.getFloat(i)));
> +        break;
> +      case Int:
> +        assertEquals(msg, values[id], directSource.getInt(i));
> +      default:
> +        break;
> +      }
> +
> +    }
> +    docValues.close();
> +    reader.close();
> +  }
> +
> +  public void index(IndexWriter writer, IndexDocValuesField valField,
> +      ValueType valueType, long[] values, int offset, int num)
> +      throws CorruptIndexException, IOException {
> +    BytesRef ref = new BytesRef(new byte[] { 1, 2, 3, 4 });
> +    for (int i = offset; i < offset + num; i++) {
> +      Document doc = new Document();
> +      doc.add(new Field("id", i + "", TextField.TYPE_STORED));
> +      switch (valueType) {
> +      case VAR_INTS:
> +        values[i] = random.nextInt();
> +        valField.setInt(values[i]);
> +        break;
> +      case FIXED_INTS_16:
> +        values[i] = random.nextInt(Short.MAX_VALUE);
> +        valField.setInt((short) values[i], true);
> +        break;
> +      case FIXED_INTS_32:
> +        values[i] = random.nextInt();
> +        valField.setInt((int) values[i], true);
> +        break;
> +      case FIXED_INTS_64:
> +        values[i] = random.nextLong();
> +        valField.setInt(values[i], true);
> +        break;
> +      case FLOAT_64:
> +        double nextDouble = random.nextDouble();
> +        values[i] = Double.doubleToRawLongBits(nextDouble);
> +        valField.setFloat(nextDouble);
> +        break;
> +      case FLOAT_32:
> +        final float nextFloat = random.nextFloat();
> +        values[i] = Double.doubleToRawLongBits(nextFloat);
> +        valField.setFloat(nextFloat);
> +        break;
> +      case FIXED_INTS_8:
> +         values[i] = (byte) i;
> +        valField.setInt((byte)values[i], true);
> +        break;
> +      case BYTES_FIXED_DEREF:
> +      case BYTES_FIXED_SORTED:
> +      case BYTES_FIXED_STRAIGHT:
> +        values[i] = random.nextLong();
> +        ref.copy(values[i]);
> +        valField.setBytes(ref, valueType);
> +        break;
> +      case BYTES_VAR_DEREF:
> +      case BYTES_VAR_SORTED:
> +      case BYTES_VAR_STRAIGHT:
> +        if (random.nextBoolean()) {
> +          ref.copy(random.nextInt());
> +          values[i] = ref.asInt();
> +        } else {
> +          ref.copy(random.nextLong());
> +          values[i] = ref.asLong();
> +        }
> +        valField.setBytes(ref, valueType);
> +        break;
> +
> +      default:
> +        fail("unexpected value " + valueType);
> +
> +      }
> +      doc.add(valField);
> +      writer.addDocument(doc);
> +      if (random.nextInt(10) == 0) {
> +        writer.commit();
> +      }
> +    }
> +  }
> +
> +  public void testPromoteBytes() throws IOException {
> +    runTest(UNSORTED_BYTES, TestType.Byte);
> +  }
> +
> +  public void testSortedPromoteBytes() throws IOException {
> +    runTest(SORTED_BYTES, TestType.Byte);
> +  }
> +
> +  public void testPromotInteger() throws IOException {
> +    runTest(INTEGERS, TestType.Int);
> +  }
> +
> +  public void testPromotFloatingPoint() throws CorruptIndexException,
> +      IOException {
> +    runTest(FLOATS, TestType.Float);
> +  }
> +
> +  public void testMergeIncompatibleTypes() throws IOException {
> +    Directory dir = newDirectory();
> +    IndexWriterConfig writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new
MockAnalyzer(random));
> +    writerConfig.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); // no merges until
we are done with adding values
> +    IndexWriter writer = new IndexWriter(dir, writerConfig);
> +    int num_1 = atLeast(200);
> +    int num_2 = atLeast(200);
> +    long[] values = new long[num_1 + num_2];
> +    index(writer, new IndexDocValuesField("promote"),
> +        randomValueType(INTEGERS, random), values, 0, num_1);
> +    writer.commit();
> +
> +    if (random.nextInt(4) == 0) {
> +      // once in a while use addIndexes
> +      Directory dir_2 = newDirectory() ;
> +      IndexWriter writer_2 = new IndexWriter(dir_2,
> +          newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
> +      index(writer_2, new IndexDocValuesField("promote"),
> +          randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES,
random), values, num_1, num_2);
> +      writer_2.commit();
> +      writer_2.close();
> +      if (random.nextBoolean()) {
> +        writer.addIndexes(dir_2);
> +      } else {
> +        // do a real merge here
> +        IndexReader open = IndexReader.open(dir_2);
> +        writer.addIndexes(open);
> +        open.close();
> +      }
> +      dir_2.close();
> +    } else {
> +      index(writer, new IndexDocValuesField("promote"),
> +          randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES,
random), values, num_1, num_2);
> +      writer.commit();
> +    }
> +    writer.close();
> +    writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
> +    if (writerConfig.getMergePolicy() instanceof NoMergePolicy) {
> +      writerConfig.setMergePolicy(newLogMergePolicy()); // make sure we optimize
to one segment (merge everything together)
> +    }
> +    writer = new IndexWriter(dir, writerConfig);
> +    // now optimize
> +    writer.optimize();
> +    writer.close();
> +    IndexReader reader = IndexReader.open(dir);
> +    assertTrue(reader.isOptimized());
> +    ReaderContext topReaderContext = reader.getTopReaderContext();
> +    ReaderContext[] children = topReaderContext.children();
> +    IndexDocValues docValues = children[0].reader.docValues("promote");
> +    assertNotNull(docValues);
> +    assertValues(TestType.Byte, dir, values);
> +    assertEquals(ValueType.BYTES_VAR_STRAIGHT, docValues.type());
> +    reader.close();
> +    dir.close();
> +  }
> +
> +}
> \ No newline at end of file
>
>
>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org


Mime
View raw message