lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dougs...@apache.org
Subject svn commit: r798995 [22/35] - in /incubator/lucene.net/trunk/C#/src: Lucene.Net/ Lucene.Net/Analysis/ Lucene.Net/Analysis/Standard/ Lucene.Net/Document/ Lucene.Net/Index/ Lucene.Net/QueryParser/ Lucene.Net/Search/ Lucene.Net/Search/Function/ Lucene.Net...
Date Wed, 29 Jul 2009 18:04:24 GMT
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSet.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/OpenBitSet.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSet.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSet.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,841 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using DocIdSet = Lucene.Net.Search.DocIdSet;
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+namespace Lucene.Net.Util
+{
+    /// <summary>
+    /// An "open" BitSet implementation that allows direct access to the array of words
+    /// storing the bits.
+    /// <p/>
+    /// Unlike java.util.bitset, the fact that bits are packed into an array of longs
+    /// is part of the interface.  This allows efficient implementation of other algorithms
+    /// by someone other than the author.  It also allows one to efficiently implement
+    /// alternate serialization or interchange formats.
+    /// <p/>
+    /// <code>OpenBitSet</code> is faster than <code>java.util.BitSet</code> in most operations
+    /// and *much* faster at calculating cardinality of sets and results of set operations.
+    /// It can also handle sets of larger cardinality (up to 64 * 2**32-1)
+    /// <p/>
+    /// The goals of <code>OpenBitSet</code> are the fastest implementation possible, and
+    /// maximum code reuse.  Extra safety and encapsulation
+    /// may always be built on top, but if that's built in, the cost can never be removed (and
+    /// hence people re-implement their own version in order to get better performance).
+    /// If you want a "safe", totally encapsulated (and slower and limited) BitSet
+    /// class, use <code>java.util.BitSet</code>.
+    /// <p/>
+    /// <h3>Performance Results</h3>
+    /// </summary>
+    [Serializable]
+    public class OpenBitSet : DocIdSet, System.ICloneable
+    {
+        protected long[] bits;
+        protected int wlen;   // number of words (elements) used in the array
+
+        /** Constructs an OpenBitSet large enough to hold numBits.
+         *
+         * @param numBits
+         */
+        public OpenBitSet(long numBits)
+        {
+            bits = new long[bits2words(numBits)];
+            wlen = bits.Length;
+        }
+
+        public OpenBitSet()
+            : this(64)
+        {
+        }
+
+        /** Constructs an OpenBitSet from an existing long[].
+         * <br/>
+         * The first 64 bits are in long[0],
+         * with bit index 0 at the least significant bit, and bit index 63 at the most significant.
+         * Given a bit index,
+         * the word containing it is long[index/64], and it is at bit number index%64 within that word.
+         * <p>
+         * numWords are the number of elements in the array that contain
+         * set bits (non-zero longs).
+         * numWords should be &lt= bits.length, and
+         * any existing words in the array at position &gt= numWords should be zero.
+         *
+         */
+        public OpenBitSet(long[] bits, int numWords)
+        {
+            this.bits = bits;
+            this.wlen = numWords;
+        }
+
+        public override DocIdSetIterator Iterator()
+        {
+            return new OpenBitSetIterator(bits, wlen);
+        }
+
+        /** Returns the current capacity in bits (1 greater than the index of the last bit) */
+        public long Capacity() { return bits.Length << 6; }
+
+        /**
+         * Returns the current capacity of this set.  Included for
+         * compatibility.  This is *not* equal to {@link #cardinality}
+         */
+        public long Size()
+        {
+            return Capacity();
+        }
+
+        /** Returns true if there are no set bits */
+        public bool IsEmpty() { return Cardinality() == 0; }
+
+        /** Expert: returns the long[] storing the bits */
+        public long[] GetBits() { return bits; }
+
+        /** Expert: sets a new long[] to use as the bit storage */
+        public void SetBits(long[] bits) { this.bits = bits; }
+
+        /** Expert: gets the number of longs in the array that are in use */
+        public int GetNumWords() { return wlen; }
+
+        /** Expert: sets the number of longs in the array that are in use */
+        public void SetNumWords(int nWords) { this.wlen = nWords; }
+
+        /** Returns true or false for the specified bit index. */
+        public bool Get(int index)
+        {
+            int i = index >> 6;               // div 64
+            // signed shift will keep a negative index and force an
+            // array-index-out-of-bounds-exception, removing the need for an explicit check.
+            if (i >= bits.Length) return false;
+
+            int bit = index & 0x3f;           // mod 64
+            long bitmask = 1L << bit;
+            return (bits[i] & bitmask) != 0;
+        }
+
+
+        /** Returns true or false for the specified bit index.
+          * The index should be less than the OpenBitSet size
+          */
+        public bool FastGet(int index)
+        {
+            int i = index >> 6;               // div 64
+            // signed shift will keep a negative index and force an
+            // array-index-out-of-bounds-exception, removing the need for an explicit check.
+            int bit = index & 0x3f;           // mod 64
+            long bitmask = 1L << bit;
+            return (bits[i] & bitmask) != 0;
+        }
+
+        /** Returns true or false for the specified bit index
+         */
+        public bool Get(long index)
+        {
+            int i = (int)(index >> 6);             // div 64
+            if (i >= bits.Length) return false;
+            int bit = (int)index & 0x3f;           // mod 64
+            long bitmask = 1L << bit;
+            return (bits[i] & bitmask) != 0;
+        }
+
+        /** Returns true or false for the specified bit index.
+         * The index should be less than the OpenBitSet size.
+         */
+        public bool FastGet(long index)
+        {
+            int i = (int)(index >> 6);               // div 64
+            int bit = (int)index & 0x3f;           // mod 64
+            long bitmask = 1L << bit;
+            return (bits[i] & bitmask) != 0;
+        }
+
+        /*
+        // alternate implementation of get()
+        public bool get1(int index) {
+          int i = index >> 6;                // div 64
+          int bit = index & 0x3f;            // mod 64
+          return ((bits[i]>>>bit) & 0x01) != 0;
+          // this does a long shift and a bittest (on x86) vs
+          // a long shift, and a long AND, (the test for zero is prob a no-op)
+          // testing on a P4 indicates this is slower than (bits[i] & bitmask) != 0;
+        }
+        */
+
+        /** returns 1 if the bit is set, 0 if not.
+         * The index should be less than the OpenBitSet size
+         */
+        public int GetBit(int index)
+        {
+            int i = index >> 6;                // div 64
+            int bit = index & 0x3f;            // mod 64
+            //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+            //return ((int)(bits[i]>>>bit)) & 0x01;
+            return ((int)((ulong)(bits[i]) >> bit)) & 0x01;
+        }
+
+        /*
+        public bool get2(int index) {
+          int word = index >> 6;            // div 64
+          int bit = index & 0x0000003f;     // mod 64
+          return (bits[word] << bit) < 0;   // hmmm, this would work if bit order were reversed
+          // we could right shift and check for parity bit, if it was available to us.
+        }
+        */
+
+        /** sets a bit, expanding the set size if necessary */
+        public void Set(long index)
+        {
+            int wordNum = ExpandingWordNum(index);
+            int bit = (int)index & 0x3f;
+            long bitmask = 1L << bit;
+            bits[wordNum] |= bitmask;
+        }
+
+        /** Sets the bit at the specified index.
+         * The index should be less than the OpenBitSet size.
+         */
+        public void FastSet(int index)
+        {
+            int wordNum = index >> 6;      // div 64
+            int bit = index & 0x3f;     // mod 64
+            long bitmask = 1L << bit;
+            bits[wordNum] |= bitmask;
+        }
+
+        /** Sets the bit at the specified index.
+         * The index should be less than the OpenBitSet size.
+         */
+        public void FastSet(long index)
+        {
+            int wordNum = (int)(index >> 6);
+            int bit = (int)index & 0x3f;
+            long bitmask = 1L << bit;
+            bits[wordNum] |= bitmask;
+        }
+
+        /** Sets a range of bits, expanding the set size if necessary
+         *
+         * @param startIndex lower index
+         * @param endIndex one-past the last bit to set
+         */
+        public void Set(long startIndex, long endIndex)
+        {
+            if (endIndex <= startIndex) return;
+
+            int startWord = (int)(startIndex >> 6);
+
+            // since endIndex is one past the end, this is index of the last
+            // word to be changed.
+            int endWord = ExpandingWordNum(endIndex - 1);
+
+            long startmask = -1L << (int)startIndex;
+            //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+            //long endmask = -1L >>> -endIndex;  // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+            long endmask = (long)(0xffffffffffffffffUL >> (int)-endIndex);  // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+            if (startWord == endWord)
+            {
+                bits[startWord] |= (startmask & endmask);
+                return;
+            }
+
+            bits[startWord] |= startmask;
+            //Arrays.fill(bits, startWord+1, endWord, -1L);
+            for (int i = startWord + 1; i < endWord; i++)
+                bits[i] = -1L;
+            bits[endWord] |= endmask;
+        }
+
+        protected int ExpandingWordNum(long index)
+        {
+            int wordNum = (int)(index >> 6);
+            if (wordNum >= wlen)
+            {
+                EnsureCapacity(index + 1);
+                wlen = wordNum + 1;
+            }
+            return wordNum;
+        }
+
+        /** clears a bit.
+         * The index should be less than the OpenBitSet size.
+         */
+        public void FastClear(int index)
+        {
+            int wordNum = index >> 6;
+            int bit = index & 0x03f;
+            long bitmask = 1L << bit;
+            bits[wordNum] &= ~bitmask;
+            // hmmm, it takes one more instruction to clear than it does to set... any
+            // way to work around this?  If there were only 63 bits per word, we could
+            // use a right shift of 10111111...111 in binary to position the 0 in the
+            // correct place (using sign extension).
+            // Could also use Long.rotateRight() or rotateLeft() *if* they were converted
+            // by the JVM into a native instruction.
+            // bits[word] &= Long.rotateLeft(0xfffffffe,bit);
+        }
+
+        /** clears a bit.
+         * The index should be less than the OpenBitSet size.
+         */
+        public void FastClear(long index)
+        {
+            int wordNum = (int)(index >> 6); // div 64
+            int bit = (int)index & 0x3f;     // mod 64
+            long bitmask = 1L << bit;
+            bits[wordNum] &= ~bitmask;
+        }
+
+        /** clears a bit, allowing access beyond the current set size without changing the size.*/
+        public void Clear(long index)
+        {
+            int wordNum = (int)(index >> 6); // div 64
+            if (wordNum >= wlen) return;
+            int bit = (int)index & 0x3f;     // mod 64
+            long bitmask = 1L << bit;
+            bits[wordNum] &= ~bitmask;
+        }
+
+        /** Clears a range of bits.  Clearing past the end does not change the size of the set.
+         *
+         * @param startIndex lower index
+         * @param endIndex one-past the last bit to clear
+         */
+        public void Clear(long startIndex, long endIndex)
+        {
+            if (endIndex <= startIndex) return;
+
+            int startWord = (int)(startIndex >> 6);
+            if (startWord >= wlen) return;
+
+            // since endIndex is one past the end, this is index of the last
+            // word to be changed.
+            int endWord = (int)((endIndex - 1) >> 6);
+
+            long startmask = -1L << (int)startIndex;
+            //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+            //long endmask = -1L >>> -endIndex;  // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+            long endmask = (long)(0xffffffffffffffffUL >> (int)-endIndex);  // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+            // invert masks since we are clearing
+            startmask = ~startmask;
+            endmask = ~endmask;
+
+            if (startWord == endWord)
+            {
+                bits[startWord] &= (startmask | endmask);
+                return;
+            }
+
+            bits[startWord] &= startmask;
+
+            int middle = Math.Min(wlen, endWord);
+            //Arrays.fill(bits, startWord+1, middle, 0L);
+            for (int i = startWord + 1; i < middle; i++)
+                bits[i] = 0L;
+            if (endWord < wlen)
+            {
+                bits[endWord] &= endmask;
+            }
+        }
+
+        /** Sets a bit and returns the previous value.
+         * The index should be less than the OpenBitSet size.
+         */
+        public bool GetAndSet(int index)
+        {
+            int wordNum = index >> 6;      // div 64
+            int bit = index & 0x3f;     // mod 64
+            long bitmask = 1L << bit;
+            bool val = (bits[wordNum] & bitmask) != 0;
+            bits[wordNum] |= bitmask;
+            return val;
+        }
+
+        /** Sets a bit and returns the previous value.
+         * The index should be less than the OpenBitSet size.
+         */
+        public bool GetAndSet(long index)
+        {
+            int wordNum = (int)(index >> 6);      // div 64
+            int bit = (int)index & 0x3f;     // mod 64
+            long bitmask = 1L << bit;
+            bool val = (bits[wordNum] & bitmask) != 0;
+            bits[wordNum] |= bitmask;
+            return val;
+        }
+
+        /** flips a bit.
+         * The index should be less than the OpenBitSet size.
+         */
+        public void FastFlip(int index)
+        {
+            int wordNum = index >> 6;      // div 64
+            int bit = index & 0x3f;     // mod 64
+            long bitmask = 1L << bit;
+            bits[wordNum] ^= bitmask;
+        }
+
+        /** flips a bit.
+         * The index should be less than the OpenBitSet size.
+         */
+        public void FastFlip(long index)
+        {
+            int wordNum = (int)(index >> 6);   // div 64
+            int bit = (int)index & 0x3f;       // mod 64
+            long bitmask = 1L << bit;
+            bits[wordNum] ^= bitmask;
+        }
+
+        /** flips a bit, expanding the set size if necessary */
+        public void Flip(long index)
+        {
+            int wordNum = ExpandingWordNum(index);
+            int bit = (int)index & 0x3f;       // mod 64
+            long bitmask = 1L << bit;
+            bits[wordNum] ^= bitmask;
+        }
+
+        /** flips a bit and returns the resulting bit value.
+         * The index should be less than the OpenBitSet size.
+         */
+        public bool FlipAndGet(int index)
+        {
+            int wordNum = index >> 6;      // div 64
+            int bit = index & 0x3f;     // mod 64
+            long bitmask = 1L << bit;
+            bits[wordNum] ^= bitmask;
+            return (bits[wordNum] & bitmask) != 0;
+        }
+
+        /** flips a bit and returns the resulting bit value.
+         * The index should be less than the OpenBitSet size.
+         */
+        public bool FlipAndGet(long index)
+        {
+            int wordNum = (int)(index >> 6);   // div 64
+            int bit = (int)index & 0x3f;       // mod 64
+            long bitmask = 1L << bit;
+            bits[wordNum] ^= bitmask;
+            return (bits[wordNum] & bitmask) != 0;
+        }
+
+        /** Flips a range of bits, expanding the set size if necessary
+         *
+         * @param startIndex lower index
+         * @param endIndex one-past the last bit to flip
+         */
+        public void Flip(long startIndex, long endIndex)
+        {
+            if (endIndex <= startIndex) return;
+            int oldlen = wlen;
+            int startWord = (int)(startIndex >> 6);
+
+            // since endIndex is one past the end, this is index of the last
+            // word to be changed.
+            int endWord = ExpandingWordNum(endIndex - 1);
+
+            /*** Grrr, java shifting wraps around so -1L>>>64 == -1
+             * for that reason, make sure not to use endmask if the bits to flip will
+             * be zero in the last word (redefine endWord to be the last changed...)
+            long startmask = -1L << (startIndex & 0x3f);     // example: 11111...111000
+            long endmask = -1L >>> (64-(endIndex & 0x3f));   // example: 00111...111111
+            ***/
+
+            long startmask = -1L << (int)startIndex;
+            //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+            //long endmask = -1L >>> -endIndex;  // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+            long endmask = (long)(0xffffffffffffffffUL >> (int)-endIndex);  // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+            if (startWord == endWord)
+            {
+                bits[startWord] ^= (startmask & endmask);
+                return;
+            }
+
+            bits[startWord] ^= startmask;
+
+            for (int i = startWord + 1; i < endWord; i++)
+            {
+                bits[i] = ~bits[i];
+            }
+
+            bits[endWord] ^= endmask;
+        }
+
+        /*
+        public static int pop(long v0, long v1, long v2, long v3) {
+          // derived from pop_array by setting last four elems to 0.
+          // exchanges one pop() call for 10 elementary operations
+          // saving about 7 instructions... is there a better way?
+            long twosA=v0 & v1;
+            long ones=v0^v1;
+
+            long u2=ones^v2;
+            long twosB =(ones&v2)|(u2&v3);
+            ones=u2^v3;
+
+            long fours=(twosA&twosB);
+            long twos=twosA^twosB;
+
+            return (pop(fours)<<2)
+                   + (pop(twos)<<1)
+                   + pop(ones);
+
+        }
+        */
+
+        /** @return the number of set bits */
+        public long Cardinality()
+        {
+            return BitUtil.pop_array(bits, 0, wlen);
+        }
+
+        /** Returns the popcount or cardinality of the intersection of the two sets.
+          * Neither set is modified.
+          */
+        public static long IntersectionCount(OpenBitSet a, OpenBitSet b)
+        {
+            return BitUtil.pop_intersect(a.bits, b.bits, 0, Math.Min(a.wlen, b.wlen));
+        }
+
+        /** Returns the popcount or cardinality of the union of the two sets.
+          * Neither set is modified.
+          */
+        public static long UnionCount(OpenBitSet a, OpenBitSet b)
+        {
+            long tot = BitUtil.pop_union(a.bits, b.bits, 0, Math.Min(a.wlen, b.wlen));
+            if (a.wlen < b.wlen)
+            {
+                tot += BitUtil.pop_array(b.bits, a.wlen, b.wlen - a.wlen);
+            }
+            else if (a.wlen > b.wlen)
+            {
+                tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen - b.wlen);
+            }
+            return tot;
+        }
+
+        /** Returns the popcount or cardinality of "a and not b"
+         * or "intersection(a, not(b))".
+         * Neither set is modified.
+         */
+        public static long AndNotCount(OpenBitSet a, OpenBitSet b)
+        {
+            long tot = BitUtil.pop_andnot(a.bits, b.bits, 0, Math.Min(a.wlen, b.wlen));
+            if (a.wlen > b.wlen)
+            {
+                tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen - b.wlen);
+            }
+            return tot;
+        }
+
+        /** Returns the popcount or cardinality of the exclusive-or of the two sets.
+         * Neither set is modified.
+         */
+        public static long XorCount(OpenBitSet a, OpenBitSet b)
+        {
+            long tot = BitUtil.pop_xor(a.bits, b.bits, 0, Math.Min(a.wlen, b.wlen));
+            if (a.wlen < b.wlen)
+            {
+                tot += BitUtil.pop_array(b.bits, a.wlen, b.wlen - a.wlen);
+            }
+            else if (a.wlen > b.wlen)
+            {
+                tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen - b.wlen);
+            }
+            return tot;
+        }
+
+
+        /** Returns the index of the first set bit starting at the index specified.
+         *  -1 is returned if there are no more set bits.
+         */
+        public int NextSetBit(int index)
+        {
+            int i = index >> 6;
+            if (i >= wlen) return -1;
+            int subIndex = index & 0x3f;      // index within the word
+            long word = bits[i] >> subIndex;  // skip all the bits to the right of index
+
+            if (word != 0)
+            {
+                return (i << 6) + subIndex + BitUtil.ntz(word);
+            }
+
+            while (++i < wlen)
+            {
+                word = bits[i];
+                if (word != 0) return (i << 6) + BitUtil.ntz(word);
+            }
+
+            return -1;
+        }
+
+        /** Returns the index of the first set bit starting at the index specified.
+         *  -1 is returned if there are no more set bits.
+         */
+        public long NextSetBit(long index)
+        {
+            //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+            //int i = (int)(index>>>6);
+            //if (i>=wlen) return -1;
+            //int subIndex = (int)index & 0x3f; // index within the word
+            //long word = bits[i] >>> subIndex;  // skip all the bits to the right of index
+            int i = (int)((ulong)index >> 6);
+            if (i >= wlen) return -1;
+            int subIndex = (int)index & 0x3f; // index within the word
+            long word = (long)((ulong)(bits[i]) >> subIndex);  // skip all the bits to the right of index
+
+            if (word != 0)
+            {
+                return (((long)i) << 6) + (subIndex + BitUtil.ntz(word));
+            }
+
+            while (++i < wlen)
+            {
+                word = bits[i];
+                if (word != 0) return (((long)i) << 6) + BitUtil.ntz(word);
+            }
+
+            return -1;
+        }
+
+
+
+
+        public virtual object Clone()
+        {
+            //try
+            //{
+            //    OpenBitSet obs = (OpenBitSet)super.clone();
+            //    obs.bits = (long[])obs.bits.clone();  // hopefully an array clone is as fast(er) than arraycopy
+            //    return obs;
+            //}
+            //catch (CloneNotSupportedException e)
+            //{
+            //    throw new RuntimeException(e);
+            //}
+            return new OpenBitSet((long[])bits.Clone(), wlen);
+        }
+
+        /** this = this AND other */
+        public void Intersect(OpenBitSet other)
+        {
+            int newLen = Math.Min(this.wlen, other.wlen);
+            long[] thisArr = this.bits;
+            long[] otherArr = other.bits;
+            // testing against zero can be more efficient
+            int pos = newLen;
+            while (--pos >= 0)
+            {
+                thisArr[pos] &= otherArr[pos];
+            }
+            if (this.wlen > newLen)
+            {
+                // fill zeros from the new shorter length to the old length
+                //Arrays.fill(bits, newLen, this.wlen, 0);
+                for (int i = newLen; i < this.wlen; i++)
+                    bits[i] = 0L;
+            }
+            this.wlen = newLen;
+        }
+
+        /** this = this OR other */
+        public void Union(OpenBitSet other)
+        {
+            int newLen = Math.Max(wlen, other.wlen);
+            EnsureCapacityWords(newLen);
+
+            long[] thisArr = this.bits;
+            long[] otherArr = other.bits;
+            int pos = Math.Min(wlen, other.wlen);
+            while (--pos >= 0)
+            {
+                thisArr[pos] |= otherArr[pos];
+            }
+            if (this.wlen < newLen)
+            {
+                //System.Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
+                Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
+            }
+            this.wlen = newLen;
+        }
+
+
+        /** Remove all elements set in other. this = this AND_NOT other */
+        public void Remove(OpenBitSet other)
+        {
+            int idx = Math.Min(wlen, other.wlen);
+            long[] thisArr = this.bits;
+            long[] otherArr = other.bits;
+            while (--idx >= 0)
+            {
+                thisArr[idx] &= ~otherArr[idx];
+            }
+        }
+
+        /** this = this XOR other */
+        public void Xor(OpenBitSet other)
+        {
+            int newLen = Math.Max(wlen, other.wlen);
+            EnsureCapacityWords(newLen);
+
+            long[] thisArr = this.bits;
+            long[] otherArr = other.bits;
+            int pos = Math.Min(wlen, other.wlen);
+            while (--pos >= 0)
+            {
+                thisArr[pos] ^= otherArr[pos];
+            }
+            if (this.wlen < newLen)
+            {
+                //System.Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
+                Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
+            }
+            this.wlen = newLen;
+        }
+
+
+        // some BitSet compatability methods
+
+        //** see {@link intersect} */
+        public void And(OpenBitSet other)
+        {
+            Intersect(other);
+        }
+
+        //** see {@link union} */
+        public void Or(OpenBitSet other)
+        {
+            Union(other);
+        }
+
+        //** see {@link andNot} */
+        public void AndNot(OpenBitSet other)
+        {
+            Remove(other);
+        }
+
+        /** returns true if the sets have any elements in common */
+        public bool Intersects(OpenBitSet other)
+        {
+            int pos = Math.Min(this.wlen, other.wlen);
+            long[] thisArr = this.bits;
+            long[] otherArr = other.bits;
+            while (--pos >= 0)
+            {
+                if ((thisArr[pos] & otherArr[pos]) != 0) return true;
+            }
+            return false;
+        }
+
+
+
+        /** Expand the long[] with the size given as a number of words (64 bit longs).
+         * getNumWords() is unchanged by this call.
+         */
+        public void EnsureCapacityWords(int numWords)
+        {
+            if (bits.Length < numWords)
+            {
+                long[] newBits = new long[numWords];
+                //System.Array.Copy(bits, 0, newBits, 0, wlen);
+                Array.Copy(bits, 0, newBits, 0, wlen);
+                bits = newBits;
+            }
+        }
+
+        /** Ensure that the long[] is big enough to hold numBits, expanding it if necessary.
+         * getNumWords() is unchanged by this call.
+         */
+        public void EnsureCapacity(long numBits)
+        {
+            EnsureCapacityWords(bits2words(numBits));
+        }
+
+        /** Lowers numWords, the number of words in use,
+         * by checking for trailing zero words.
+         */
+        public void trimTrailingZeros()
+        {
+            int idx = wlen - 1;
+            while (idx >= 0 && bits[idx] == 0) idx--;
+            wlen = idx + 1;
+        }
+
+        /** returns the number of 64 bit words it would take to hold numBits */
+        public static int bits2words(long numBits)
+        {
+            //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+            //return (int)(((numBits-1)>>>6)+1);
+            return (int)(((ulong)(numBits - 1) >> 6) + 1);
+        }
+
+
+        /** returns true if both sets have the same bits set */
+        public override bool Equals(object o)
+        {
+            if (this == o) return true;
+            if (!(o is OpenBitSet)) return false;
+            OpenBitSet a;
+            OpenBitSet b = (OpenBitSet)o;
+            // make a the larger set.
+            if (b.wlen > this.wlen)
+            {
+                a = b; b = this;
+            }
+            else
+            {
+                a = this;
+            }
+
+            // check for any set bits out of the range of b
+            for (int i = a.wlen - 1; i >= b.wlen; i--)
+            {
+                if (a.bits[i] != 0) return false;
+            }
+
+            for (int i = b.wlen - 1; i >= 0; i--)
+            {
+                if (a.bits[i] != b.bits[i]) return false;
+            }
+
+            return true;
+        }
+
+
+        public override int GetHashCode()
+        {
+            //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+            //  long h = 0x98761234;  // something non-zero for length==0
+            //  for (int i = bits.length; --i>=0;) {
+            //  h ^= bits[i];
+            //  h = (h << 1) | (h >>> 63); // rotate left
+            //return (int)((h>>32) ^ h);  // fold leftmost bits into right
+            long h = 0x98761234;  // something non-zero for length==0
+            for (int i = bits.Length; --i >= 0; )
+            {
+                h ^= bits[i];
+                h = (h << 1) | (long)((ulong)h >> 63); // rotate left
+            }
+            return (int)((h >> 32) ^ h);  // fold leftmost bits into right
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSetDISI.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/OpenBitSetDISI.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSetDISI.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSetDISI.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+namespace Lucene.Net.Util
+{
+    public class OpenBitSetDISI : OpenBitSet
+    {
+        /** Construct an OpenBitSetDISI with its bits set
+         * from the doc ids of the given DocIdSetIterator.
+         * Also give a maximum size one larger than the largest doc id for which a
+         * bit may ever be set on this OpenBitSetDISI.
+         */
+        public OpenBitSetDISI(DocIdSetIterator disi, int maxSize)
+            : base(maxSize)
+        {
+            InPlaceOr(disi);
+        }
+
+        /** Construct an OpenBitSetDISI with no bits set, and a given maximum size
+         * one larger than the largest doc id for which a bit may ever be set
+         * on this OpenBitSetDISI.
+         */
+        public OpenBitSetDISI(int maxSize)
+            : base(maxSize)
+        {
+        }
+
+        /**
+         * Perform an inplace OR with the doc ids from a given DocIdSetIterator,
+         * setting the bit for each such doc id.
+         * These doc ids should be smaller than the maximum size passed to the
+         * constructor.
+         */
+        public void InPlaceOr(DocIdSetIterator disi)
+        {
+            while (disi.Next() && (disi.Doc() < Size()))
+            {
+                FastSet(disi.Doc());
+            }
+        }
+
+        /**
+         * Perform an inplace AND with the doc ids from a given DocIdSetIterator,
+         * leaving only the bits set for which the doc ids are in common.
+         * These doc ids should be smaller than the maximum size passed to the
+         * constructor.
+         */
+        public void InPlaceAnd(DocIdSetIterator disi)
+        {
+            int index = NextSetBit(0);
+            int lastNotCleared = -1;
+            while ((index != -1) && disi.SkipTo(index))
+            {
+                while ((index != -1) && (index < disi.Doc()))
+                {
+                    FastClear(index);
+                    index = NextSetBit(index + 1);
+                }
+                if (index == disi.Doc())
+                {
+                    lastNotCleared = index;
+                    index++;
+                }
+                System.Diagnostics.Debug.Assert((index == -1) || (index > disi.Doc()));
+            }
+            Clear(lastNotCleared + 1, Size());
+        }
+
+        /**
+         * Perform an inplace NOT with the doc ids from a given DocIdSetIterator,
+         * clearing all the bits for each such doc id.
+         * These doc ids should be smaller than the maximum size passed to the
+         * constructor.
+         */
+        public void InPlaceNot(DocIdSetIterator disi)
+        {
+            while (disi.Next() && (disi.Doc() < Size()))
+            {
+                FastClear(disi.Doc());
+            }
+        }
+
+        /**
+         * Perform an inplace XOR with the doc ids from a given DocIdSetIterator,
+         * flipping all the bits for each such doc id.
+         * These doc ids should be smaller than the maximum size passed to the
+         * constructor.
+         */
+        public void InPlaceXor(DocIdSetIterator disi)
+        {
+            while (disi.Next() && (disi.Doc() < Size()))
+            {
+                FastFlip(disi.Doc());
+            }
+        }
+    }
+}

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSetIterator.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/OpenBitSetIterator.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSetIterator.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSetIterator.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,194 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+namespace Lucene.Net.Util
+{
+
+    /** An iterator to iterate over set bits in an OpenBitSet.
+     * This is faster than nextSetBit() for iterating over the complete set of bits,
+     * especially when the density of the bits set is high.
+     *
+     * @version $Id$
+     */
+    public class OpenBitSetIterator : DocIdSetIterator
+    {
+
+        // The General Idea: instead of having an array per byte that has
+        // the offsets of the next set bit, that array could be
+        // packed inside a 32 bit integer (8 4 bit numbers).  That
+        // should be faster than accessing an array for each index, and
+        // the total array size is kept smaller (256*sizeof(int))=1K
+            //{DOUG-2.4.0: numbers too large for int trying to be assigned - could either make whole array uint or do unchecked casts
+        //protected static readonly int[] bitlist = {
+    //0x0,0x1,0x2,0x21,0x3,0x31,0x32,0x321,0x4,0x41,0x42,0x421,0x43,0x431,0x432,0x4321,0x5,0x51,0x52,0x521,0x53,0x531,0x532,0x5321,0x54,0x541,0x542,0x5421,0x543,0x5431,0x5432,0x54321,0x6,0x61,0x62,0x621,0x63,0x631,0x632,0x6321,0x64,0x641,0x642,0x6421,0x643,0x6431,0x6432,0x64321,0x65,0x651,0x652,0x6521,0x653,0x6531,0x6532,0x65321,0x654,0x6541,0x6542,0x65421,0x6543,0x65431,0x65432,0x654321,0x7,0x71,0x72,0x721,0x73,0x731,0x732,0x7321,0x74,0x741,0x742,0x7421,0x743,0x7431,0x7432,0x74321,0x75,0x751,0x752,0x7521,0x753,0x7531,0x7532,0x75321,0x754,0x7541,0x7542,0x75421,0x7543,0x75431,0x75432,0x754321,0x76,0x761,0x762,0x7621,0x763,0x7631,0x7632,0x76321,0x764,0x7641,0x7642,0x76421,0x7643,0x76431,0x76432,0x764321,0x765,0x7651,0x7652,0x76521,0x7653,0x76531,0x76532,0x765321,0x7654,0x76541,0x76542,0x765421,0x76543,0x765431,0x765432,0x7654321,0x8,0x81,0x82,0x821,0x83,0x831,0x832,0x8321,0x84,0x841,0x842,0x8421,0x843,0x8431,0x8432,0x84321,0x85,0x851,0x852,0x8521,0x853,0x8531,0x8532,0x85321,0x
 854,0x8541,0x8542,0x85421,0x8543,0x85431,0x85432,0x854321,0x86,0x861,0x862,0x8621,0x863,0x8631,0x8632,0x86321,0x864,0x8641,0x8642,0x86421,0x8643,0x86431,0x86432,0x864321,0x865,0x8651,0x8652,0x86521,0x8653,0x86531,0x86532,0x865321,0x8654,0x86541,0x86542,0x865421,0x86543,0x865431,0x865432,0x8654321,0x87,0x871,0x872,0x8721,0x873,0x8731,0x8732,0x87321,0x874,0x8741,0x8742,0x87421,0x8743,0x87431,0x87432,0x874321,0x875,0x8751,0x8752,0x87521,0x8753,0x87531,0x87532,0x875321,0x8754,0x87541,0x87542,0x875421,0x87543,0x875431,0x875432,0x8754321,0x876,0x8761,0x8762,0x87621,0x8763,0x87631,0x87632,0x876321,0x8764,0x87641,0x87642,0x876421,0x87643,0x876431,0x876432,0x8764321,0x8765,0x87651,0x87652,0x876521,0x87653,0x876531,0x876532,0x8765321,0x87654,0x876541,0x876542,0x8765421,0x876543,0x8765431,0x8765432,0x87654321
+        protected static readonly uint[] bitlist = {
+    0x0,0x1,0x2,0x21,0x3,0x31,0x32,0x321,0x4,0x41,0x42,0x421,0x43,0x431,0x432,0x4321,0x5,0x51,0x52,0x521,0x53,0x531,0x532,0x5321,0x54,0x541,0x542,0x5421,0x543,0x5431,0x5432,0x54321,0x6,0x61,0x62,0x621,0x63,0x631,0x632,0x6321,0x64,0x641,0x642,0x6421,0x643,0x6431,0x6432,0x64321,0x65,0x651,0x652,0x6521,0x653,0x6531,0x6532,0x65321,0x654,0x6541,0x6542,0x65421,0x6543,0x65431,0x65432,0x654321,0x7,0x71,0x72,0x721,0x73,0x731,0x732,0x7321,0x74,0x741,0x742,0x7421,0x743,0x7431,0x7432,0x74321,0x75,0x751,0x752,0x7521,0x753,0x7531,0x7532,0x75321,0x754,0x7541,0x7542,0x75421,0x7543,0x75431,0x75432,0x754321,0x76,0x761,0x762,0x7621,0x763,0x7631,0x7632,0x76321,0x764,0x7641,0x7642,0x76421,0x7643,0x76431,0x76432,0x764321,0x765,0x7651,0x7652,0x76521,0x7653,0x76531,0x76532,0x765321,0x7654,0x76541,0x76542,0x765421,0x76543,0x765431,0x765432,0x7654321,0x8,0x81,0x82,0x821,0x83,0x831,0x832,0x8321,0x84,0x841,0x842,0x8421,0x843,0x8431,0x8432,0x84321,0x85,0x851,0x852,0x8521,0x853,0x8531,0x8532,0x85321,0x85
 4,0x8541,0x8542,0x85421,0x8543,0x85431,0x85432,0x854321,0x86,0x861,0x862,0x8621,0x863,0x8631,0x8632,0x86321,0x864,0x8641,0x8642,0x86421,0x8643,0x86431,0x86432,0x864321,0x865,0x8651,0x8652,0x86521,0x8653,0x86531,0x86532,0x865321,0x8654,0x86541,0x86542,0x865421,0x86543,0x865431,0x865432,0x8654321,0x87,0x871,0x872,0x8721,0x873,0x8731,0x8732,0x87321,0x874,0x8741,0x8742,0x87421,0x8743,0x87431,0x87432,0x874321,0x875,0x8751,0x8752,0x87521,0x8753,0x87531,0x87532,0x875321,0x8754,0x87541,0x87542,0x875421,0x87543,0x875431,0x875432,0x8754321,0x876,0x8761,0x8762,0x87621,0x8763,0x87631,0x87632,0x876321,0x8764,0x87641,0x87642,0x876421,0x87643,0x876431,0x876432,0x8764321,0x8765,0x87651,0x87652,0x876521,0x87653,0x876531,0x876532,0x8765321,0x87654,0x876541,0x876542,0x8765421,0x876543,0x8765431,0x8765432,0x87654321
+  };
+        /***** the python code that generated bitlist
+        def bits2int(val):
+        arr=0
+        for shift in range(8,0,-1):
+          if val & 0x80:
+            arr = (arr << 4) | shift
+          val = val << 1
+        return arr
+
+        def int_table():
+          tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ]
+          return ','.join(tbl)
+        ******/
+
+        // hmmm, what about an iterator that finds zeros though,
+        // or a reverse iterator... should they be separate classes
+        // for efficiency, or have a common root interface?  (or
+        // maybe both?  could ask for a SetBitsIterator, etc...
+
+
+        private readonly long[] arr;
+        private readonly int words;
+        private int i = -1;
+        private long word;
+        private int wordShift;
+        private int indexArray;
+        private int curDocId;
+
+        public OpenBitSetIterator(OpenBitSet obs)
+            : this(obs.GetBits(), obs.GetNumWords())
+        {
+        }
+
+        public OpenBitSetIterator(long[] bits, int numWords)
+        {
+            arr = bits;
+            words = numWords;
+        }
+
+        // 64 bit shifts
+        private void shift()
+        {
+            //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+            //if ((int)word ==0) {wordShift +=32; word = word >>>32; }
+            //if ((word & 0x0000FFFF) == 0) { wordShift +=16; word >>>=16; }
+            //if ((word & 0x000000FF) == 0) { wordShift +=8; word >>>=8; }
+            //indexArray = bitlist[(int)word & 0xff];
+            if ((int)word == 0) { wordShift += 32; word = (long)((ulong)word >> 32); }
+            if ((word & 0x0000FFFF) == 0) { wordShift += 16; word = (long)((ulong)word >> 16); }
+            if ((word & 0x000000FF) == 0) { wordShift += 8; word = (long)((ulong)word >> 8); }
+            indexArray = (int)bitlist[(int)word & 0xff];
+        }
+
+        /***** alternate shift implementations
+        // 32 bit shifts, but a long shift needed at the end
+        private void shift2() {
+          int y = (int)word;
+          if (y==0) {wordShift +=32; y = (int)(word >>>32); }
+          if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; }
+          if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; }
+          indexArray = bitlist[y & 0xff];
+          word >>>= (wordShift +1);
+        }
+
+        private void shift3() {
+          int lower = (int)word;
+          int lowByte = lower & 0xff;
+          if (lowByte != 0) {
+            indexArray=bitlist[lowByte];
+            return;
+          }
+          shift();
+        }
+        ******/
+
+        public override bool Next() {
+    if (indexArray==0) {
+      if (word!=0) {
+            //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+        //word >>>= 8;
+          word = (long)((ulong)word >> 8);
+        wordShift += 8;
+      }
+
+      while (word==0) {
+        if (++i >= words) {
+          curDocId = -1;
+          return false;
+        }
+        word = arr[i];
+        wordShift =-1;  // loop invariant code motion should move this
+      }
+
+      // after the first time, should I go with a linear search, or
+      // stick with the binary search in shift?
+      shift();
+    }
+
+    int bitIndex = (indexArray & 0x0f) + wordShift;
+            //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+    //indexArray >>>= 4;
+    indexArray = (int)((uint)indexArray >> 4);
+
+    // should i<<6 be cached as a separate variable?
+    // it would only save one cycle in the best circumstances.
+    curDocId = (i<<6) + bitIndex;
+    return true;
+  }
+
+        public override bool SkipTo(int target) {
+    indexArray=0;
+    i = target >> 6;
+    if (i>=words) {
+      word =0; // setup so next() will also return -1
+      curDocId = -1;
+      return false;
+    }
+    wordShift = target & 0x3f;
+            //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+    //word = arr[i] >>> wordShift;
+    word = (long)((ulong)(arr[i]) >> wordShift);
+    if (word !=0) {
+      wordShift--; // compensate for 1 based arrIndex
+    } else {
+      while (word ==0) {
+        if (++i >= words) {
+          curDocId = -1;
+          return false;
+        }
+        word = arr[i];
+      }
+      wordShift =-1;
+    }
+
+    shift();
+
+    int bitIndex = (indexArray & 0x0f) + wordShift;
+            //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+    //indexArray >>>= 4;
+    indexArray = (int)((uint)indexArray >> 4);
+    // should i<<6 be cached as a separate variable?
+    // it would only save one cycle in the best circumstances.
+    curDocId = (i<<6) + bitIndex;
+    return true;
+  }
+
+        public override int Doc()
+        {
+            return this.curDocId;
+        }
+    }
+}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/Parameter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/Parameter.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/Parameter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/Parameter.cs Wed Jul 29 18:04:12 2009
@@ -22,7 +22,7 @@
 	
 	/// <summary> A serializable Enum class.</summary>
 	[Serializable]
-    public abstract class Parameter : System.Runtime.Serialization.IObjectReference
+	public abstract class Parameter
 	{
 		internal static System.Collections.IDictionary allParameters = new System.Collections.Hashtable();
 		
@@ -61,22 +61,15 @@
 		/// </summary>
 		/// <returns> a reference to Parameter as resolved in the local VM
 		/// </returns>
-		/// <throws>  ObjectStreamException </throws>
-		protected internal virtual System.Object ReadResolve()
+		/// <throws>  objectStreamException </throws>
+		protected internal virtual object ReadResolve()
 		{
-			System.Object par = allParameters[MakeKey(name)];
+			object par = allParameters[MakeKey(name)];
 			
 			if (par == null)
 				throw new System.IO.IOException("Unknown parameter value: " + name);
 			
 			return par;
 		}
-        
-        // "ReadResolve"s equivalent for .NET
-        // https://issues.apache.org/jira/browse/LUCENENET-170
-        public Object GetRealObject(System.Runtime.Serialization.StreamingContext context)
-        {
-            return ReadResolve();
-        }
 	}
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/PriorityQueue.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/PriorityQueue.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/PriorityQueue.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/PriorityQueue.cs Wed Jul 29 18:04:12 2009
@@ -28,15 +28,15 @@
 	{
 		private int size;
 		private int maxSize;
-		protected internal System.Object[] heap;
+		protected internal object[] heap;
 		
 		/// <summary>Determines the ordering of objects in this priority queue.  Subclasses
 		/// must define this one method. 
 		/// </summary>
-		public abstract bool LessThan(System.Object a, System.Object b);
+		public abstract bool LessThan(object a, object b);
 		
 		/// <summary>Subclass constructors must call this. </summary>
-		protected internal void  Initialize(int maxSize)
+		protected  void  Initialize(int maxSize)
 		{
 			size = 0;
 			int heapSize;
@@ -45,15 +45,15 @@
 				heapSize = 2;
 			else
 				heapSize = maxSize + 1;
-			heap = new System.Object[heapSize];
+			heap = new object[heapSize];
 			this.maxSize = maxSize;
 		}
 		
-		/// <summary> Adds an Object to a PriorityQueue in log(size) time.
+		/// <summary> Adds an object to a PriorityQueue in log(size) time.
 		/// If one tries to add more objects than maxSize from initialize
 		/// a RuntimeException (ArrayIndexOutOfBound) is thrown.
 		/// </summary>
-		public void  Put(System.Object element)
+		public void  Put(object element)
 		{
 			size++;
 			heap[size] = element;
@@ -67,7 +67,7 @@
 		/// </param>
 		/// <returns> true if element is added, false otherwise.
 		/// </returns>
-		public virtual bool Insert(System.Object element)
+		public virtual bool Insert(object element)
 		{
 			return InsertWithOverflow(element) != element;
 		}
@@ -81,7 +81,7 @@
 		/// heap and now has been replaced by a larger one, or null
 		/// if the queue wasn't yet full with maxSize elements.
 		/// </summary>
-		public virtual System.Object InsertWithOverflow(System.Object element)
+		public virtual object InsertWithOverflow(object element)
 		{
 			if (size < maxSize)
 			{
@@ -90,7 +90,7 @@
 			}
 			else if (size > 0 && !LessThan(element, heap[1]))
 			{
-				System.Object ret = heap[1];
+				object ret = heap[1];
 				heap[1] = element;
 				AdjustTop();
 				return ret;
@@ -102,7 +102,7 @@
 		}
 		
 		/// <summary>Returns the least element of the PriorityQueue in constant time. </summary>
-		public System.Object Top()
+		public object Top()
 		{
 			// We don't need to check size here: if maxSize is 0,
 			// then heap is length 2 array with both entries null.
@@ -113,11 +113,11 @@
 		/// <summary>Removes and returns the least element of the PriorityQueue in log(size)
 		/// time. 
 		/// </summary>
-		public System.Object Pop()
+		public object Pop()
 		{
 			if (size > 0)
 			{
-				System.Object result = heap[1]; // save first value
+				object result = heap[1]; // save first value
 				heap[1] = heap[size]; // move last to first
 				heap[size] = null; // permit GC of objects
 				size--;
@@ -128,7 +128,7 @@
 				return null;
 		}
 		
-		/// <summary>Should be called when the Object at top changes values.  Still log(n)
+		/// <summary>Should be called when the object at top changes values.  Still log(n)
 		/// worst case, but it's at least twice as fast to <pre>
 		/// { pq.top().change(); pq.adjustTop(); }
 		/// </pre> instead of <pre>
@@ -157,7 +157,7 @@
 		private void  UpHeap()
 		{
 			int i = size;
-			System.Object node = heap[i]; // save bottom node
+			object node = heap[i]; // save bottom node
 			int j = SupportClass.Number.URShift(i, 1);
 			while (j > 0 && LessThan(node, heap[j]))
 			{
@@ -171,7 +171,7 @@
 		private void  DownHeap()
 		{
 			int i = 1;
-			System.Object node = heap[i]; // save top node
+			object node = heap[i]; // save top node
 			int j = i << 1; // find smaller child
 			int k = j + 1;
 			if (k <= size && LessThan(heap[k], heap[j]))

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/SmallFloat.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/SmallFloat.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/SmallFloat.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/SmallFloat.cs Wed Jul 29 18:04:12 2009
@@ -19,15 +19,9 @@
 
 namespace Lucene.Net.Util
 {
-	
-	
-	/// <summary>Floating point numbers smaller than 32 bits.
-	/// 
+	/// <summary>
+    /// Floating point numbers smaller than 32 bits.
 	/// </summary>
-	/// <author>  yonik
-	/// </author>
-	/// <version>  $Id$
-	/// </version>
 	public class SmallFloat
 	{
 		

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/SortedVIntList.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/SortedVIntList.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/SortedVIntList.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/SortedVIntList.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,265 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using BitArray = System.Collections.BitArray;
+using DocIdSet = Lucene.Net.Search.DocIdSet;
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+namespace Lucene.Net.Util
+{
+    /**
+     *  Store and iterate sorted integers in compressed form in RAM.
+     *  <br>The code for compressing the differences between ascending integers was
+     *  borrowed from {@link org.apache.lucene.store.IndexInput} and
+     *  {@link org.apache.lucene.store.IndexOutput}.
+     */
+    public class SortedVIntList : DocIdSet
+    {
+
+        /** When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set,
+        * a SortedVIntList representing the index numbers of the set bits
+        * will be smaller than that BitSet.
+        */
+        protected internal static readonly int BITS2VINTLIST_SIZE = 8;
+
+        private int size;
+        private byte[] bytes;
+        private int lastBytePos;
+
+        /**
+         *  Create a SortedVIntList from all elements of an array of integers.
+         *
+         * @param  sortedInts  A sorted array of non negative integers.
+         */
+        public SortedVIntList(int[] sortedInts)
+            :
+          this(sortedInts, sortedInts.Length)
+        {
+        }
+
+        /**
+         * Create a SortedVIntList from an array of integers.
+         * @param  sortedInts  An array of sorted non negative integers.
+         * @param  inputSize   The number of integers to be used from the array.
+         */
+        public SortedVIntList(int[] sortedInts, int inputSize)
+        {
+            SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
+            for (int i = 0; i < inputSize; i++)
+            {
+                builder.AddInt(sortedInts[i]);
+            }
+            builder.Done();
+        }
+
+        /**
+         * Create a SortedVIntList from a BitSet.
+         * @param  bits  A bit set representing a set of integers.
+         */
+        public SortedVIntList(SupportClass.CollectionsSupport.BitSet bits)
+        {
+            SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
+            int nextInt = bits.NextSetBit(0);
+            while (nextInt != -1)
+            {
+                builder.AddInt(nextInt);
+                nextInt = bits.NextSetBit(nextInt + 1);
+            }
+            builder.Done();
+        }
+
+        /**
+         * Create a SortedVIntList from an OpenBitSet.
+         * @param  bits  A bit set representing a set of integers.
+         */
+        public SortedVIntList(OpenBitSet bits)
+        {
+            SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
+            int nextInt = bits.NextSetBit(0);
+            while (nextInt != -1)
+            {
+                builder.AddInt(nextInt);
+                nextInt = bits.NextSetBit(nextInt + 1);
+            }
+            builder.Done();
+        }
+
+        /**
+         * Create a SortedVIntList.
+         * @param  docIdSetIterator  An iterator providing document numbers as a set of integers.
+         *                  This DocIdSetIterator is iterated completely when this constructor
+         *                  is called and it must provide the integers in non
+         *                  decreasing order.
+         */
+        public SortedVIntList(DocIdSetIterator docIdSetIterator)
+        {
+            SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
+            while (docIdSetIterator.Next())
+            {
+                builder.AddInt(docIdSetIterator.Doc());
+            }
+            builder.Done();
+        }
+
+
+
+        private void InitBytes()
+        {
+            size = 0;
+            bytes = new byte[128]; // initial byte size
+            lastBytePos = 0;
+        }
+
+        private void ResizeBytes(int newSize)
+        {
+            if (newSize != bytes.Length)
+            {
+                byte[] newBytes = new byte[newSize];
+                System.Array.Copy(bytes, 0, newBytes, 0, lastBytePos);
+                bytes = newBytes;
+            }
+        }
+
+        private static readonly int VB1 = 0x7F;
+        private static readonly int BIT_SHIFT = 7;
+        private readonly int MAX_BYTES_PER_INT = (31 / BIT_SHIFT) + 1;
+
+        /**
+         * @return    The total number of sorted integers.
+         */
+        public int Size()
+        {
+            return size;
+        }
+
+        /**
+         * @return The size of the byte array storing the compressed sorted integers.
+         */
+        public int GetByteSize()
+        {
+            return bytes.Length;
+        }
+
+        /**
+         * @return    An iterator over the sorted integers.
+         */
+        public override DocIdSetIterator Iterator()
+        {
+            return new AnonymousDocIdSetIterator(this);
+        }
+
+        private class AnonymousDocIdSetIterator : DocIdSetIterator
+        {
+            private SortedVIntList enclosingInstance;
+            internal AnonymousDocIdSetIterator(SortedVIntList enclosingInstance)
+            {
+                this.enclosingInstance = enclosingInstance;
+            }
+
+            int bytePos = 0;
+            int lastInt = 0;
+
+            private void Advance()
+            {
+                // See org.apache.lucene.store.IndexInput.readVInt()
+                byte b = enclosingInstance.bytes[bytePos++];
+                lastInt += b & VB1;
+                for (int s = BIT_SHIFT; (b & ~VB1) != 0; s += BIT_SHIFT)
+                {
+                    b = enclosingInstance.bytes[bytePos++];
+                    lastInt += (b & VB1) << s;
+                }
+            }
+
+            public override int Doc() { return lastInt; }
+
+            public override bool Next()
+            {
+                if (bytePos >= enclosingInstance.lastBytePos)
+                {
+                    return false;
+                }
+                else
+                {
+                    Advance();
+                    return true;
+                }
+            }
+
+            public override bool SkipTo(int docNr)
+            {
+                while (bytePos < enclosingInstance.lastBytePos)
+                {
+                    Advance();
+                    if (lastInt >= docNr)
+                    { // No skipping to docNr available.
+                        return true;
+                    }
+                }
+                return false;
+            }
+        }
+
+        //private class SortedVIntListBuilder {
+        internal class SortedVIntListBuilder
+        {
+            private int lastInt = 0;
+
+            private SortedVIntList enclosingInstance;
+            internal SortedVIntListBuilder(SortedVIntList enclosingInstance)
+            {
+                this.enclosingInstance = enclosingInstance;
+                enclosingInstance.InitBytes();
+                lastInt = 0;
+            }
+
+            internal void AddInt(int nextInt)
+            {
+                int diff = nextInt - lastInt;
+                if (diff < 0)
+                {
+                    throw new System.ArgumentException(
+                        "Input not sorted or first element negative.");
+                }
+
+                if ((enclosingInstance.lastBytePos + enclosingInstance.MAX_BYTES_PER_INT) > enclosingInstance.bytes.Length)
+                {
+                    // biggest possible int does not fit
+                    enclosingInstance.ResizeBytes((enclosingInstance.bytes.Length * 2) + enclosingInstance.MAX_BYTES_PER_INT);
+                }
+
+                // See org.apache.lucene.store.IndexOutput.writeVInt()
+                while ((diff & ~VB1) != 0)
+                { // The high bit of the next byte needs to be set.
+                    enclosingInstance.bytes[enclosingInstance.lastBytePos++] = (byte)((diff & VB1) | ~VB1);
+                    //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+                    //diff >>>= BIT_SHIFT;
+                    diff = (int)((uint)diff >> BIT_SHIFT);
+                }
+                enclosingInstance.bytes[enclosingInstance.lastBytePos++] = (byte)diff; // Last byte, high bit not set.
+                enclosingInstance.size++;
+                lastInt = nextInt;
+            }
+
+            internal void Done()
+            {
+                enclosingInstance.ResizeBytes(enclosingInstance.lastBytePos);
+            }
+        }
+
+    }
+}
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/StringHelper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/StringHelper.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/StringHelper.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/StringHelper.cs Wed Jul 29 18:04:12 2009
@@ -15,19 +15,31 @@
  * limitations under the License.
  */
 
-using System;
-
 namespace Lucene.Net.Util
 {
-	
-	
-	/// <summary> Methods for manipulating strings.
-	/// 
-	/// $Id: StringHelper.java 472959 2006-11-09 16:21:50Z yonik $
+	/// <summary>
+    /// Methods for manipulating strings.
 	/// </summary>
 	public abstract class StringHelper
 	{
-		
+        /// <summary>
+        /// Compares two byte arrays, starting at their 0th elements, and returns the 
+        /// number of common elements before encountering a difference.
+        /// </summary>
+        /// <param name="bytes1"></param>
+        /// <param name="len1"></param>
+        /// <param name="bytes2"></param>
+        /// <param name="len2"></param>
+        /// <returns>the number of common elements</returns>
+        public static int bytesDifference(byte[] bytes1, int len1, byte[] bytes2, int len2)
+        {
+            int len = len1 < len2 ? len1 : len2;
+            for (int i = 0; i < len; i++)
+                if (bytes1[i] != bytes2[i])
+                    return i;
+            return len;
+        }
+
 		/// <summary> Compares two strings, character by character, and returns the
 		/// first position where the two strings differ from one another.
 		/// 
@@ -38,7 +50,7 @@
 		/// </param>
 		/// <returns> The first position where the two strings differ.
 		/// </returns>
-		public static int StringDifference(System.String s1, System.String s2)
+		public static int StringDifference(string s1, string s2)
 		{
 			int len1 = s1.Length;
 			int len2 = s2.Length;
@@ -53,7 +65,6 @@
 			return len;
 		}
 		
-		
 		private StringHelper()
 		{
 		}

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/UnicodeUtil.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/UnicodeUtil.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/UnicodeUtil.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/UnicodeUtil.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,498 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Some of this code came from the excellent Unicode
+ * conversion examples from:
+ *
+ *   http://www.unicode.org/Public/PROGRAMS/CVTUTF
+ *
+ * Full Copyright for that code follows:
+ */
+
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ * 
+ * Disclaimer
+ * 
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ * 
+ * Limitations on Rights to Redistribute This Code
+ * 
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+namespace Lucene.Net.Util
+{
+    /**
+     * Class to encode java's UTF16 char[] into UTF8 byte[]
+     * without always allocating a new byte[] as
+     * String.getBytes("UTF-8") does.
+     *
+     * <p><b>WARNING</b>: This API is a new and experimental and
+     * may suddenly change. </p>
+     */
+    sealed public class UnicodeUtil
+    {
+
+        public static readonly int UNI_SUR_HIGH_START = 0xD800;
+        public static readonly int UNI_SUR_HIGH_END = 0xDBFF;
+        public static readonly int UNI_SUR_LOW_START = 0xDC00;
+        public static readonly int UNI_SUR_LOW_END = 0xDFFF;
+        public static readonly int UNI_REPLACEMENT_CHAR = 0xFFFD;
+
+        private static readonly long UNI_MAX_BMP = 0x0000FFFF;
+
+        private static readonly int HALF_BASE = 0x0010000;
+        private static readonly long HALF_SHIFT = 10;
+        private static readonly long HALF_MASK = 0x3FFL;
+
+        sealed public class UTF8Result
+        {
+            public byte[] result = new byte[10];
+            public int length;
+
+            public void setLength(int newLength)
+            {
+                if (result.Length < newLength)
+                {
+                    byte[] newArray = new byte[(int)(1.5 * newLength)];
+                    System.Array.Copy(result, 0, newArray, 0, length);
+                    result = newArray;
+                }
+                length = newLength;
+            }
+        }
+
+        sealed public class UTF16Result
+        {
+            public char[] result = new char[10];
+            public int[] offsets = new int[10];
+            public int length;
+
+            public void setLength(int newLength)
+            {
+                if (result.Length < newLength)
+                {
+                    char[] newArray = new char[(int)(1.5 * newLength)];
+                    System.Array.Copy(result, 0, newArray, 0, length);
+                    result = newArray;
+                }
+                length = newLength;
+            }
+
+            public void copyText(UTF16Result other)
+            {
+                setLength(other.length);
+                System.Array.Copy(other.result, 0, result, 0, length);
+            }
+        }
+
+        /** Encode characters from a char[] source, starting at
+         *  offset and stopping when the character 0xffff is seen.
+         *  Returns the number of bytes written to bytesOut. */
+        public static void UTF16toUTF8(/* in */ char[] source, /* in */ int offset, UTF8Result result)
+        {
+
+            int upto = 0;
+            int i = offset;
+            byte[] out_Renamed = result.result;
+
+            while (true)
+            {
+
+                int code = (int)source[i++];
+
+                if (upto + 4 > out_Renamed.Length)
+                {
+                    byte[] newOut = new byte[2 * out_Renamed.Length];
+                    System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
+                    System.Array.Copy(out_Renamed, 0, newOut, 0, upto);
+                    result.result = out_Renamed = newOut;
+                }
+                if (code < 0x80)
+                    out_Renamed[upto++] = (byte)code;
+                else if (code < 0x800)
+                {
+                    out_Renamed[upto++] = (byte)(0xC0 | (code >> 6));
+                    out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
+                }
+                else if (code < 0xD800 || code > 0xDFFF)
+                {
+                    if (code == 0xffff)
+                        // END
+                        break;
+                    out_Renamed[upto++] = (byte)(0xE0 | (code >> 12));
+                    out_Renamed[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F));
+                    out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
+                }
+                else
+                {
+                    // surrogate pair
+                    // confirm valid high surrogate
+                    if (code < 0xDC00 && source[i] != 0xffff)
+                    {
+                        int utf32 = (int)source[i];
+                        // confirm valid low surrogate and write pair
+                        if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
+                        {
+                            utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
+                            i++;
+                            out_Renamed[upto++] = (byte)(0xF0 | (utf32 >> 18));
+                            out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F));
+                            out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
+                            out_Renamed[upto++] = (byte)(0x80 | (utf32 & 0x3F));
+                            continue;
+                        }
+                    }
+                    // replace unpaired surrogate or out_Renamed-of-order low surrogate
+                    // with substitution character
+                    out_Renamed[upto++] = (byte)0xEF;
+                    out_Renamed[upto++] = (byte)0xBF;
+                    out_Renamed[upto++] = (byte)0xBD;
+                }
+            }
+            //System.Diagnostics.Debug.Assert(matches(source, offset, i-offset-1, out_Renamed, upto);
+            result.length = upto;
+        }
+
+        /** Encode characters from a char[] source, starting at
+         *  offset for length chars.  Returns the number of bytes
+         *  written to bytesOut. */
+        public static void UTF16toUTF8(/* in */ char[] source, /* in */ int offset, /* in */ int length, UTF8Result result)
+        {
+
+            int upto = 0;
+            int i = offset;
+            int end = offset + length;
+            byte[] out_Renamed = result.result;
+
+            while (i < end)
+            {
+
+                int code = (int)source[i++];
+
+                if (upto + 4 > out_Renamed.Length)
+                {
+                    byte[] newOut = new byte[2 * out_Renamed.Length];
+                    System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
+                    System.Array.Copy(out_Renamed, 0, newOut, 0, upto);
+                    result.result = out_Renamed = newOut;
+                }
+                if (code < 0x80)
+                    out_Renamed[upto++] = (byte)code;
+                else if (code < 0x800)
+                {
+                    out_Renamed[upto++] = (byte)(0xC0 | (code >> 6));
+                    out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
+                }
+                else if (code < 0xD800 || code > 0xDFFF)
+                {
+                    out_Renamed[upto++] = (byte)(0xE0 | (code >> 12));
+                    out_Renamed[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F));
+                    out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
+                }
+                else
+                {
+                    // surrogate pair
+                    // confirm valid high surrogate
+                    if (code < 0xDC00 && i < end && source[i] != 0xffff)
+                    {
+                        int utf32 = (int)source[i];
+                        // confirm valid low surrogate and write pair
+                        if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
+                        {
+                            utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
+                            i++;
+                            out_Renamed[upto++] = (byte)(0xF0 | (utf32 >> 18));
+                            out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F));
+                            out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
+                            out_Renamed[upto++] = (byte)(0x80 | (utf32 & 0x3F));
+                            continue;
+                        }
+                    }
+                    // replace unpaired surrogate or out_Renamed-of-order low surrogate
+                    // with substitution character
+                    out_Renamed[upto++] = (byte)0xEF;
+                    out_Renamed[upto++] = (byte)0xBF;
+                    out_Renamed[upto++] = (byte)0xBD;
+                }
+            }
+            //System.Diagnostics.Debug.Assert(matches(source, offset, length, out_Renamed, upto);
+            result.length = upto;
+        }
+
+        /** Encode characters from this String, starting at offset
+         *  for length characters.  Returns the number of bytes
+         *  written to bytesOut. */
+        public static void UTF16toUTF8(/* in */ string s, /* in */ int offset, /* in */ int length, UTF8Result result)
+        {
+            int end = offset + length;
+
+            byte[] out_Renamed = result.result;
+
+            int upto = 0;
+            for (int i = offset; i < end; i++)
+            {
+                int code = (int)s[i];
+
+                if (upto + 4 > out_Renamed.Length)
+                {
+                    byte[] newOut = new byte[2 * out_Renamed.Length];
+                    System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
+                    System.Array.Copy(out_Renamed, 0, newOut, 0, upto);
+                    result.result = out_Renamed = newOut;
+                }
+                if (code < 0x80)
+                    out_Renamed[upto++] = (byte)code;
+                else if (code < 0x800)
+                {
+                    out_Renamed[upto++] = (byte)(0xC0 | (code >> 6));
+                    out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
+                }
+                else if (code < 0xD800 || code > 0xDFFF)
+                {
+                    out_Renamed[upto++] = (byte)(0xE0 | (code >> 12));
+                    out_Renamed[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F));
+                    out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
+                }
+                else
+                {
+                    // surrogate pair
+                    // confirm valid high surrogate
+                    if (code < 0xDC00 && (i < end - 1))
+                    {
+                        int utf32 = (int)s[i + 1];
+                        // confirm valid low surrogate and write pair
+                        if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
+                        {
+                            utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
+                            i++;
+                            out_Renamed[upto++] = (byte)(0xF0 | (utf32 >> 18));
+                            out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F));
+                            out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
+                            out_Renamed[upto++] = (byte)(0x80 | (utf32 & 0x3F));
+                            continue;
+                        }
+                    }
+                    // replace unpaired surrogate or out_Renamed-of-order low surrogate
+                    // with substitution character
+                    out_Renamed[upto++] = (byte)0xEF;
+                    out_Renamed[upto++] = (byte)0xBF;
+                    out_Renamed[upto++] = (byte)0xBD;
+                }
+            }
+            //System.Diagnostics.Debug.Assert(matches(s, offset, length, out_Renamed, upto);
+            result.length = upto;
+        }
+
+        /** Convert UTF8 bytes into UTF16 characters.  If offset
+         *  is non-zero, conversion starts at that starting point
+         *  in utf8, re-using the results from the previous call
+         *  up until offset. */
+        public static void UTF8toUTF16(/* in */ byte[] utf8, /* in */ int offset, /* in */ int length, /* in */ UTF16Result result)
+        {
+
+            int end = offset + length;
+            char[] out_Renamed = result.result;
+            if (result.offsets.Length <= end)
+            {
+                int[] newOffsets = new int[2 * end];
+                System.Array.Copy(result.offsets, 0, newOffsets, 0, result.offsets.Length);
+                result.offsets = newOffsets;
+            }
+            int[] offsets = result.offsets;
+
+            // If incremental decoding fell in the middle of a
+            // single unicode character, rollback to its start:
+            int upto = offset;
+            while (offsets[upto] == -1)
+                upto--;
+
+            int outUpto = offsets[upto];
+
+            // Pre-allocate for worst case 1-for-1
+            if (outUpto + length >= out_Renamed.Length)
+            {
+                char[] newOut = new char[2 * (outUpto + length)];
+                System.Array.Copy(out_Renamed, 0, newOut, 0, outUpto);
+                result.result = out_Renamed = newOut;
+            }
+
+            while (upto < end)
+            {
+
+                int b = utf8[upto] & 0xff;
+                int ch;
+
+                offsets[upto++] = outUpto;
+
+                if (b < 0xc0)
+                {
+                    System.Diagnostics.Debug.Assert(b < 0x80);
+                    ch = b;
+                }
+                else if (b < 0xe0)
+                {
+                    ch = ((b & 0x1f) << 6) + (utf8[upto] & 0x3f);
+                    offsets[upto++] = -1;
+                }
+                else if (b < 0xf0)
+                {
+                    ch = ((b & 0xf) << 12) + ((utf8[upto] & 0x3f) << 6) + (utf8[upto + 1] & 0x3f);
+                    offsets[upto++] = -1;
+                    offsets[upto++] = -1;
+                }
+                else
+                {
+                    System.Diagnostics.Debug.Assert(b < 0xf8);
+                    ch = ((b & 0x7) << 18) + ((utf8[upto] & 0x3f) << 12) + ((utf8[upto + 1] & 0x3f) << 6) + (utf8[upto + 2] & 0x3f);
+                    offsets[upto++] = -1;
+                    offsets[upto++] = -1;
+                    offsets[upto++] = -1;
+                }
+
+                if (ch <= UNI_MAX_BMP)
+                {
+                    // target is a character <= 0xFFFF
+                    out_Renamed[outUpto++] = (char)ch;
+                }
+                else
+                {
+                    // target is a character in range 0xFFFF - 0x10FFFF
+                    int chHalf = ch - HALF_BASE;
+                    out_Renamed[outUpto++] = (char)((chHalf >> (int)HALF_SHIFT) + UNI_SUR_HIGH_START);
+                    out_Renamed[outUpto++] = (char)((chHalf & HALF_MASK) + UNI_SUR_LOW_START);
+                }
+            }
+
+            offsets[upto] = outUpto;
+            result.length = outUpto;
+        }
+
+        // Only called from assert
+        /*
+        private static boolean matches(char[] source, int offset, int length, byte[] result, int upto) {
+          try {
+            String s1 = new String(source, offset, length);
+            String s2 = new String(result, 0, upto, "UTF-8");
+            if (!s1.equals(s2)) {
+              //System.out_Renamed.println("DIFF: s1 len=" + s1.Length());
+              //for(int i=0;i<s1.Length();i++)
+              //  System.out_Renamed.println("    " + i + ": " + (int) s1.charAt(i));
+              //System.out_Renamed.println("s2 len=" + s2.Length());
+              //for(int i=0;i<s2.Length();i++)
+              //  System.out_Renamed.println("    " + i + ": " + (int) s2.charAt(i));
+
+              // If the input string was invalid, then the
+              // difference is OK
+              if (!validUTF16String(s1))
+                return true;
+
+              return false;
+            }
+            return s1.equals(s2);
+          } catch (UnsupportedEncodingException uee) {
+            return false;
+          }
+        }
+
+        // Only called from assert
+        private static boolean matches(String source, int offset, int length, byte[] result, int upto) {
+          try {
+            String s1 = source.substring(offset, offset+length);
+            String s2 = new String(result, 0, upto, "UTF-8");
+            if (!s1.equals(s2)) {
+              // Allow a difference if s1 is not valid UTF-16
+
+              //System.out_Renamed.println("DIFF: s1 len=" + s1.Length());
+              //for(int i=0;i<s1.Length();i++)
+              //  System.out_Renamed.println("    " + i + ": " + (int) s1.charAt(i));
+              //System.out_Renamed.println("  s2 len=" + s2.Length());
+              //for(int i=0;i<s2.Length();i++)
+              //  System.out_Renamed.println("    " + i + ": " + (int) s2.charAt(i));
+
+              // If the input string was invalid, then the
+              // difference is OK
+              if (!validUTF16String(s1))
+                return true;
+
+              return false;
+            }
+            return s1.equals(s2);
+          } catch (UnsupportedEncodingException uee) {
+            return false;
+          }
+        }
+
+        public static final boolean validUTF16String(String s) {
+          final int size = s.Length();
+          for(int i=0;i<size;i++) {
+            char ch = s.charAt(i);
+            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+              if (i < size-1) {
+                i++;
+                char nextCH = s.charAt(i);
+                if (nextCH >= UNI_SUR_LOW_START && nextCH <= UNI_SUR_LOW_END) {
+                  // Valid surrogate pair
+                } else
+                  // Unmatched hight surrogate
+                  return false;
+              } else
+                // Unmatched hight surrogate
+                return false;
+            } else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
+              // Unmatched low surrogate
+              return false;
+          }
+
+          return true;
+        }
+
+        public static final boolean validUTF16String(char[] s, int size) {
+          for(int i=0;i<size;i++) {
+            char ch = s[i];
+            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+              if (i < size-1) {
+                i++;
+                char nextCH = s[i];
+                if (nextCH >= UNI_SUR_LOW_START && nextCH <= UNI_SUR_LOW_END) {
+                  // Valid surrogate pair
+                } else
+                  return false;
+              } else
+                return false;
+            } else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
+              // Unmatched low surrogate
+              return false;
+          }
+
+          return true;
+        }
+        */
+    }
+}



Mime
View raw message