Return-Path: X-Original-To: apmail-lucene-dev-archive@www.apache.org Delivered-To: apmail-lucene-dev-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 3BD18D463 for ; Mon, 19 Nov 2012 20:43:21 +0000 (UTC) Received: (qmail 4630 invoked by uid 500); 19 Nov 2012 20:43:20 -0000 Delivered-To: apmail-lucene-dev-archive@lucene.apache.org Received: (qmail 4575 invoked by uid 500); 19 Nov 2012 20:43:20 -0000 Mailing-List: contact dev-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list dev@lucene.apache.org Received: (qmail 4568 invoked by uid 99); 19 Nov 2012 20:43:20 -0000 Received: from minotaur.apache.org (HELO minotaur.apache.org) (140.211.11.9) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 19 Nov 2012 20:43:20 +0000 Received: from localhost (HELO mail-qc0-f181.google.com) (127.0.0.1) (smtp-auth username rmuir, mechanism plain) by minotaur.apache.org (qpsmtpd/0.29) with ESMTP; Mon, 19 Nov 2012 20:43:19 +0000 Received: by mail-qc0-f181.google.com with SMTP id x40so3777980qcp.12 for ; Mon, 19 Nov 2012 12:43:18 -0800 (PST) Received: by 10.229.78.157 with SMTP id l29mr3101947qck.9.1353357798340; Mon, 19 Nov 2012 12:43:18 -0800 (PST) MIME-Version: 1.0 Received: by 10.49.12.193 with HTTP; Mon, 19 Nov 2012 12:42:58 -0800 (PST) In-Reply-To: References: <20121119201931.7546E23888CD@eris.apache.org> From: Robert Muir Date: Mon, 19 Nov 2012 15:42:58 -0500 Message-ID: Subject: Re: svn commit: r1411392 - /lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java To: simon.willnauer@gmail.com Cc: dev@lucene.apache.org Content-Type: multipart/alternative; boundary=00235429dfe4d2f85104cedf2ac2 --00235429dfe4d2f85104cedf2ac2 Content-Type: text/plain; charset=UTF-8 It should actually be enabled for 4.1 as well. the hack is just a failsafe, to ensure we dont somehow lose ALL docvalues fields and silently skip all tests... currently this one found a bug in the default sorted bytes merging though.... On Mon, Nov 19, 2012 at 3:34 PM, Simon Willnauer wrote: > On Mon, Nov 19, 2012 at 9:19 PM, wrote: > > Author: rmuir > > Date: Mon Nov 19 20:19:30 2012 > > New Revision: 1411392 > > > > URL: http://svn.apache.org/viewvc?rev=1411392&view=rev > > Log: > > checkindex > > > > Modified: > > > lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java > > > > Modified: > lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java > > URL: > http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1411392&r1=1411391&r2=1411392&view=diff > > > ============================================================================== > > --- > lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java > (original) > > +++ > lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java > Mon Nov 19 20:19:30 2012 > > @@ -1341,6 +1341,15 @@ public class CheckIndex { > > status.totalValueFields++; > > final DocValues docValues = reader.docValues(fieldInfo.name); > > checkDocValues(docValues, fieldInfo.name, > fieldInfo.getDocValuesType(), reader.maxDoc()); > > + // nocommit hack hack hack > > + if (reader.core.simpleDVProducer != null) { > > + checkSimpleDocValues(fieldInfo, reader); > > + } else { > > + // hack hack hack > > + if (info.info.getCodec().getName().equals("SimpleText")) { > > + throw new RuntimeException("docvalues lost for field: " + > fieldInfo + "!!!!"); > > you can enable this for 4.1 too now might give more coverage! > > simon > > + } > > + } > > } else { > > if (reader.docValues(fieldInfo.name) != null) { > > throw new RuntimeException("field: " + fieldInfo.name + " > has docvalues but should omit them!"); > > @@ -1358,6 +1367,74 @@ public class CheckIndex { > > } > > return status; > > } > > + > > + private void checkBinaryDocValues(FieldInfo fi, SegmentReader reader, > BinaryDocValues dv) { > > + final boolean fixed = dv.isFixedLength(); > > + final int maxLength = dv.maxLength(); > > + boolean fixed2 = true; > > + int maxLength2 = -1; > > + BytesRef scratch = new BytesRef(); > > + for (int i = 0; i < reader.maxDoc(); i++) { > > + dv.get(i, scratch); > > + if (maxLength2 == -1) { > > + maxLength2 = scratch.length; > > + } else { > > + fixed2 &= scratch.length == maxLength2; > > + maxLength2 = Math.max(maxLength2, scratch.length); > > + } > > + } > > + if (fixed != fixed2) { > > + throw new RuntimeException("dv for field: " + fi.name + " > reports fixed=" + fixed + " but this is not the case!"); > > + } > > + if (maxLength != maxLength2) { > > + throw new RuntimeException("dv for field: " + fi.name + " > reports maxLength=" + maxLength + " but this is not the case: " + > maxLength2); > > + } > > + } > > + > > + private void checkNumericDocValues(FieldInfo fi, SegmentReader > reader, NumericDocValues ndv) { > > + final long minValue = ndv.minValue(); > > + final long maxValue = ndv.maxValue(); > > + long minValue2 = Long.MAX_VALUE; > > + long maxValue2 = Long.MIN_VALUE; > > + for (int i = 0; i < reader.maxDoc(); i++) { > > + long value = ndv.get(i); > > + minValue2 = Math.min(minValue2, value); > > + maxValue2 = Math.max(maxValue2, value); > > + } > > + if (minValue != minValue2) { > > + throw new RuntimeException("dv for field: " + fi.name + " > reports minValue=" + minValue + " but this is not the case: " + minValue2); > > + } > > + if (maxValue != maxValue2) { > > + throw new RuntimeException("dv for field: " + fi.name + " > reports maxValue=" + maxValue + " but this is not the case: " + maxValue2); > > + } > > + } > > + > > + // nocommit > > + private void checkSimpleDocValues(FieldInfo fi, SegmentReader reader) > throws Exception { > > + switch(fi.getDocValuesType()) { > > + case BYTES_FIXED_SORTED: > > + case BYTES_VAR_SORTED: > > + case BYTES_FIXED_DEREF: > > + case BYTES_VAR_DEREF: > > + checkBinaryDocValues(fi, reader, reader.getSortedDocValues( > fi.name)); > > + break; > > + case BYTES_FIXED_STRAIGHT: > > + case BYTES_VAR_STRAIGHT: > > + checkBinaryDocValues(fi, reader, reader.getBinaryDocValues( > fi.name)); > > + break; > > + case FLOAT_32: > > + case FLOAT_64: > > + case VAR_INTS: > > + case FIXED_INTS_16: > > + case FIXED_INTS_32: > > + case FIXED_INTS_64: > > + case FIXED_INTS_8: > > + checkNumericDocValues(fi, reader, reader.getNumericDocValues( > fi.name)); > > + break; > > + default: > > + throw new AssertionError(); > > + } > > + } > > > > /** > > * Test term vectors for a segment. > > > > > --00235429dfe4d2f85104cedf2ac2 Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: quoted-printable It should actually be enabled for 4.1 as well.

the hack is just a fa= ilsafe, to ensure we dont somehow lose ALL docvalues fields and silently sk= ip all tests...

currently this one found a bug in the default sorted= bytes merging though....

On Mon, Nov 19, 2012 at 3:34 PM, Simon Willn= auer <simon.willnauer@gmail.com> wrote:
On Mon, Nov 19, 2012 at 9:19 PM, = =C2=A0<rmuir@apache.org> wrot= e:
> Author: rmuir
> Date: Mon Nov 19 20:19:30 2012
> New Revision: 1411392
>
> URL: http://svn.apache.org/viewvc?rev=3D1411392&view= =3Drev
> Log:
> checkindex
>
> Modified:
> =C2=A0 =C2=A0 lucene/dev/branches/lucene4547/lucene/core/src/java/org/= apache/lucene/index/CheckIndex.java
>
> Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apac= he/lucene/index/CheckIndex.java
> URL: = http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src= /java/org/apache/lucene/index/CheckIndex.java?rev=3D1411392&r1=3D141139= 1&r2=3D1411392&view=3Ddiff
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D
> --- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/luc= ene/index/CheckIndex.java (original)
> +++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/luc= ene/index/CheckIndex.java Mon Nov 19 20:19:30 2012
> @@ -1341,6 +1341,15 @@ public class CheckIndex {
> =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0status.totalValueFields++; > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0final DocValues docValues =3D= reader.docValues(fieldInfo.name);
> =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0checkDocValues(docValues, fie= ldInfo.name, fieldInfo.getDocValuesType(), reader.maxDoc());
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0// nocommit hack hack hack
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0if (reader.core.simpleDVProducer != =3D null) {
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0checkSimpleDocValues(fieldI= nfo, reader);
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0} else {
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0// hack hack hack
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0if (info.info.getCodec().ge= tName().equals("SimpleText")) {
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0throw new RuntimeExc= eption("docvalues lost for field: " + fieldInfo + "!!!!"= ;);

you can enable this for 4.1 too now might give more coverage!
simon
> + =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0}
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0}
> =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0} else {
> =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0if (reader.docValues(fieldInf= o.name) !=3D null) {
> =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0throw new RuntimeExcep= tion("field: " + fieldInfo.name + " has docvalues but should= omit them!");
> @@ -1358,6 +1367,74 @@ public class CheckIndex {
> =C2=A0 =C2=A0 =C2=A0}
> =C2=A0 =C2=A0 =C2=A0return status;
> =C2=A0 =C2=A0}
> +
> + =C2=A0private void checkBinaryDocValues(FieldInfo fi, SegmentReader = reader, BinaryDocValues dv) {
> + =C2=A0 =C2=A0final boolean fixed =3D dv.isFixedLength();
> + =C2=A0 =C2=A0final int maxLength =3D dv.maxLength();
> + =C2=A0 =C2=A0boolean fixed2 =3D true;
> + =C2=A0 =C2=A0int maxLength2 =3D -1;
> + =C2=A0 =C2=A0BytesRef scratch =3D new BytesRef();
> + =C2=A0 =C2=A0for (int i =3D 0; i < reader.maxDoc(); i++) {
> + =C2=A0 =C2=A0 =C2=A0dv.get(i, scratch);
> + =C2=A0 =C2=A0 =C2=A0if (maxLength2 =3D=3D -1) {
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0maxLength2 =3D scratch.length;
> + =C2=A0 =C2=A0 =C2=A0} else {
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0fixed2 &=3D scratch.length =3D=3D max= Length2;
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0maxLength2 =3D Math.max(maxLength2, scrat= ch.length);
> + =C2=A0 =C2=A0 =C2=A0}
> + =C2=A0 =C2=A0}
> + =C2=A0 =C2=A0if (fixed !=3D fixed2) {
> + =C2=A0 =C2=A0 =C2=A0throw new RuntimeException("dv for field: &= quot; + fi.name + " r= eports fixed=3D" + fixed + " but this is not the case!"); > + =C2=A0 =C2=A0}
> + =C2=A0 =C2=A0if (maxLength !=3D maxLength2) {
> + =C2=A0 =C2=A0 =C2=A0throw new RuntimeException("dv for field: &= quot; + fi.name + " r= eports maxLength=3D" + maxLength + " but this is not the case: &q= uot; + maxLength2);
> + =C2=A0 =C2=A0}
> + =C2=A0}
> +
> + =C2=A0private void checkNumericDocValues(FieldInfo fi, SegmentReader= reader, NumericDocValues ndv) {
> + =C2=A0 =C2=A0final long minValue =3D ndv.minValue();
> + =C2=A0 =C2=A0final long maxValue =3D ndv.maxValue();
> + =C2=A0 =C2=A0long minValue2 =3D Long.MAX_VALUE;
> + =C2=A0 =C2=A0long maxValue2 =3D Long.MIN_VALUE;
> + =C2=A0 =C2=A0for (int i =3D 0; i < reader.maxDoc(); i++) {
> + =C2=A0 =C2=A0 =C2=A0long value =3D ndv.get(i);
> + =C2=A0 =C2=A0 =C2=A0minValue2 =3D Math.min(minValue2, value);
> + =C2=A0 =C2=A0 =C2=A0maxValue2 =3D Math.max(maxValue2, value);
> + =C2=A0 =C2=A0}
> + =C2=A0 =C2=A0if (minValue !=3D minValue2) {
> + =C2=A0 =C2=A0 =C2=A0throw new RuntimeException("dv for field: &= quot; + fi.name + " r= eports minValue=3D" + minValue + " but this is not the case: &quo= t; + minValue2);
> + =C2=A0 =C2=A0}
> + =C2=A0 =C2=A0if (maxValue !=3D maxValue2) {
> + =C2=A0 =C2=A0 =C2=A0throw new RuntimeException("dv for field: &= quot; + fi.name + " r= eports maxValue=3D" + maxValue + " but this is not the case: &quo= t; + maxValue2);
> + =C2=A0 =C2=A0}
> + =C2=A0}
> +
> + =C2=A0// nocommit
> + =C2=A0private void checkSimpleDocValues(FieldInfo fi, SegmentReader = reader) throws Exception {
> + =C2=A0 =C2=A0switch(fi.getDocValuesType()) {
> + =C2=A0 =C2=A0 =C2=A0case BYTES_FIXED_SORTED:
> + =C2=A0 =C2=A0 =C2=A0case BYTES_VAR_SORTED:
> + =C2=A0 =C2=A0 =C2=A0case BYTES_FIXED_DEREF:
> + =C2=A0 =C2=A0 =C2=A0case BYTES_VAR_DEREF:
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0checkBinaryDocValues(fi, reader, reader.g= etSortedDocValues(fi.name)= );
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0break;
> + =C2=A0 =C2=A0 =C2=A0case BYTES_FIXED_STRAIGHT:
> + =C2=A0 =C2=A0 =C2=A0case BYTES_VAR_STRAIGHT:
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0checkBinaryDocValues(fi, reader, reader.g= etBinaryDocValues(fi.name)= );
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0break;
> + =C2=A0 =C2=A0 =C2=A0case FLOAT_32:
> + =C2=A0 =C2=A0 =C2=A0case FLOAT_64:
> + =C2=A0 =C2=A0 =C2=A0case VAR_INTS:
> + =C2=A0 =C2=A0 =C2=A0case FIXED_INTS_16:
> + =C2=A0 =C2=A0 =C2=A0case FIXED_INTS_32:
> + =C2=A0 =C2=A0 =C2=A0case FIXED_INTS_64:
> + =C2=A0 =C2=A0 =C2=A0case FIXED_INTS_8:
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0checkNumericDocValues(fi, reader, reader.= getNumericDocValues(fi.name));
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0break;
> + =C2=A0 =C2=A0 =C2=A0default:
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0throw new AssertionError();
> + =C2=A0 =C2=A0}
> + =C2=A0}
>
> =C2=A0 =C2=A0/**
> =C2=A0 =C2=A0 * Test term vectors for a segment.
>
>

--00235429dfe4d2f85104cedf2ac2--