lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Yonik Seeley (JIRA)" <j...@apache.org>
Subject [jira] Commented: (LUCENE-530) Extend NumberTools to support int/long/float/double to string
Date Wed, 22 Mar 2006 18:18:09 GMT
    [ http://issues.apache.org/jira/browse/LUCENE-530?page=comments#action_12371446 ] 

Yonik Seeley commented on LUCENE-530:
-------------------------------------

Here is how Solr did it:
http://svn.apache.org/viewcvs.cgi/incubator/solr/trunk/src/java/org/apache/solr/util/NumberUtils.java?rev=382610&view=markup

It's a binary representation transformed to sort correctly and fit in to chars.
A 4 byte int or float is transformed into 3 java chars
An 8 byte long or double is transformed into 5 java chars

> Extend NumberTools to support int/long/float/double to string
> -------------------------------------------------------------
>
>          Key: LUCENE-530
>          URL: http://issues.apache.org/jira/browse/LUCENE-530
>      Project: Lucene - Java
>         Type: Improvement
>   Components: Analysis
>     Versions: 1.9
>     Reporter: Andy Hind
>     Priority: Minor

>
> Extend Number tools to support int/long/float/double to string 
> So you can search using range queries on int/long/float/double, if you want.
> Here is the basis for how NumberTools cold be extended to support int/long/double/float.
> As I only write these values to the index and fix tokenisation in searchesI was not so
fussed about the reverse transformations back to Strings.
> public class NumericEncoder
> {
>     /*
>      * Constants for integer encoding
>      */
>     static int INTEGER_SIGN_MASK = 0x80000000;
>     /*
>      * Constants for long encoding
>      */
>     static long LONG_SIGN_MASK = 0x8000000000000000L;
>     /*
>      * Constants for float encoding
>      */
>     static int FLOAT_SIGN_MASK = 0x80000000;
>     static int FLOAT_EXPONENT_MASK = 0x7F800000;
>     static int FLOAT_MANTISSA_MASK = 0x007FFFFF;
>     /*
>      * Constants for double encoding
>      */
>     static long DOUBLE_SIGN_MASK = 0x8000000000000000L;
>     static long DOUBLE_EXPONENT_MASK = 0x7FF0000000000000L;
>     static long DOUBLE_MANTISSA_MASK = 0x000FFFFFFFFFFFFFL;
>     private NumericEncoder()
>     {
>         super();
>     }
>     /**
>      * Encode an integer into a string that orders correctly using string
>      * comparison Integer.MIN_VALUE encodes as 00000000 and MAX_VALUE as
>      * ffffffff.
>      * 
>      * @param intToEncode
>      * @return
>      */
>     public static String encode(int intToEncode)
>     {
>         int replacement = intToEncode ^ INTEGER_SIGN_MASK;
>         return encodeToHex(replacement);
>     }
>     /**
>      * Encode a long into a string that orders correctly using string comparison
>      * Long.MIN_VALUE encodes as 0000000000000000 and MAX_VALUE as
>      * ffffffffffffffff.
>      * 
>      * @param longToEncode
>      * @return
>      */
>     public static String encode(long longToEncode)
>     {
>         long replacement = longToEncode ^ LONG_SIGN_MASK;
>         return encodeToHex(replacement);
>     }
>     /**
>      * Encode a float into a string that orders correctly according to string
>      * comparison. Note that there is no negative NaN but there are codings that
>      * imply this. So NaN and -Infinity may not compare as expected.
>      * 
>      * @param floatToEncode
>      * @return
>      */
>     public static String encode(float floatToEncode)
>     {
>         int bits = Float.floatToIntBits(floatToEncode);
>         int sign = bits & FLOAT_SIGN_MASK;
>         int exponent = bits & FLOAT_EXPONENT_MASK;
>         int mantissa = bits & FLOAT_MANTISSA_MASK;
>         if (sign != 0)
>         {
>             exponent ^= FLOAT_EXPONENT_MASK;
>             mantissa ^= FLOAT_MANTISSA_MASK;
>         }
>         sign ^= FLOAT_SIGN_MASK;
>         int replacement = sign | exponent | mantissa;
>         return encodeToHex(replacement);
>     }
>     /**
>      * Encode a double into a string that orders correctly according to string
>      * comparison. Note that there is no negative NaN but there are codings that
>      * imply this. So NaN and -Infinity may not compare as expected.
>      * 
>      * @param doubleToEncode
>      * @return
>      */
>     public static String encode(double doubleToEncode)
>     {
>         long bits = Double.doubleToLongBits(doubleToEncode);
>         long sign = bits & DOUBLE_SIGN_MASK;
>         long exponent = bits & DOUBLE_EXPONENT_MASK;
>         long mantissa = bits & DOUBLE_MANTISSA_MASK;
>         if (sign != 0)
>         {
>             exponent ^= DOUBLE_EXPONENT_MASK;
>             mantissa ^= DOUBLE_MANTISSA_MASK;
>         }
>         sign ^= DOUBLE_SIGN_MASK;
>         long replacement = sign | exponent | mantissa;
>         return encodeToHex(replacement);
>     }
>     private static String encodeToHex(int i)
>     {
>         char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0' };
>         int charPos = 8;
>         do
>         {
>             buf[--charPos] = DIGITS[i & MASK];
>             i >>>= 4;
>         }
>         while (i != 0);
>         return new String(buf);
>     }
>     private static String encodeToHex(long l)
>     {
>         char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
'0', '0', '0', '0', '0' };
>         int charPos = 16;
>         do
>         {
>             buf[--charPos] = DIGITS[(int) l & MASK];
>             l >>>= 4;
>         }
>         while (l != 0);
>         return new String(buf);
>     }
>     private static final char[] DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8',
'9', 'a', 'b', 'c', 'd', 'e',
>             'f' };
>     private static final int MASK = (1 << 4) - 1;
> }
> public class NumericEncodingTest extends TestCase
> {
>     public NumericEncodingTest()
>     {
>         super();
>     }
>     public NumericEncodingTest(String arg0)
>     {
>         super(arg0);
>     }
>     /**
>      * Do an exhaustive test for integers
>      * 
>      */
>     public void xtestAllIntegerEncodings()
>     {
>         String lastString = null;
>         String nextString = null;
>         for (long i = Integer.MIN_VALUE; i <= Integer.MAX_VALUE; i++)
>         {
>             nextString = NumericEncoder.encode((int) i);
>             if (lastString != null)
>             {
>                 assertFalse(lastString.compareTo(nextString) > 0);
>             }
>             lastString = nextString;
>         }
>     }
>     /**
>      * Do an exhaustive test for float
>      * 
>      */
>     public void xtestAllFloatEncodings()
>     {
>         Float last = null;
>         Float next = null;
>         String lastString = null;
>         String nextString = null;
>         for (int sign = 1; sign >= 0; sign--)
>         {
>             if (sign == 0)
>             {
>                 for (int exponent = 0; exponent <= 0xFF; exponent++)
>                 {
>                     for (int mantissa = 0; mantissa <= 0x007FFFFF; mantissa++)
>                     {
>                         int bitPattern = sign << 31 | exponent << 23 | mantissa;
>                         next = Float.intBitsToFloat(bitPattern);
>                         if (!next.equals(Float.NaN) && (last != null) &&
(last.compareTo(next) > 0))
>                         {
>                             System.err.println(last + " > " + next);
>                         }
>                         if (!next.equals(Float.NaN))
>                         {
>                             nextString = NumericEncoder.encode(next);
>                             if ((lastString != null) && (lastString.compareTo(nextString)
> 0))
>                             {
>                                 System.err.println(lastString + " > " + nextString);
>                             }
>                             lastString = nextString;
>                         }
>                         last = next;
>                     }
>                 }
>             }
>             else
>             {
>                 for (int exponent = 0xFF; exponent >= 0; exponent--)
>                 {
>                     for (int mantissa = 0x007FFFFF; mantissa >= 0; mantissa--)
>                     {
>                         int bitPattern = sign << 31 | exponent << 23 | mantissa;
>                         next = Float.intBitsToFloat(bitPattern);
>                         if (!next.equals(Float.NaN) && (last != null) &&
(last.compareTo(next) > 0))
>                         {
>                             System.err.println(last + " > " + next);
>                         }
>                         if (!next.equals(Float.NaN))
>                         {
>                             nextString = NumericEncoder.encode(next);
>                             if ((lastString != null) && (lastString.compareTo(nextString)
> 0))
>                             {
>                                 System.err.println(lastString + " > " + nextString);
>                             }
>                             lastString = nextString;
>                         }
>                         last = next;
>                     }
>                 }
>             }
>         }
>     }
>     /*
>      * Sample test for int
>      */
>     public void testIntegerEncoding()
>     {
>         assertEquals("00000000", NumericEncoder.encode(Integer.MIN_VALUE));
>         assertEquals("00000001", NumericEncoder.encode(Integer.MIN_VALUE + 1));
>         assertEquals("7fffffff", NumericEncoder.encode(-1));
>         assertEquals("80000000", NumericEncoder.encode(0));
>         assertEquals("80000001", NumericEncoder.encode(1));
>         assertEquals("fffffffe", NumericEncoder.encode(Integer.MAX_VALUE - 1));
>         assertEquals("ffffffff", NumericEncoder.encode(Integer.MAX_VALUE));
>     }
>     /*
>      * Sample test for long
>      */
>     public void testLongEncoding()
>     {
>         assertEquals("0000000000000000", NumericEncoder.encode(Long.MIN_VALUE));
>         assertEquals("0000000000000001", NumericEncoder.encode(Long.MIN_VALUE + 1));
>         assertEquals("7fffffffffffffff", NumericEncoder.encode(-1L));
>         assertEquals("8000000000000000", NumericEncoder.encode(0L));
>         assertEquals("8000000000000001", NumericEncoder.encode(1L));
>         assertEquals("fffffffffffffffe", NumericEncoder.encode(Long.MAX_VALUE - 1));
>         assertEquals("ffffffffffffffff", NumericEncoder.encode(Long.MAX_VALUE));    
 
>     }
>     /*
>      * Sample test for float
>      */
>     public void testFloatEncoding()
>     {
>         assertEquals("007fffff", NumericEncoder.encode(Float.NEGATIVE_INFINITY));
>         assertEquals("00800000", NumericEncoder.encode(-Float.MAX_VALUE));
>         assertEquals("7ffffffe", NumericEncoder.encode(-Float.MIN_VALUE));
>         assertEquals("7fffffff", NumericEncoder.encode(-0f));
>         assertEquals("80000000", NumericEncoder.encode(0f));
>         assertEquals("80000001", NumericEncoder.encode(Float.MIN_VALUE));
>         assertEquals("ff7fffff", NumericEncoder.encode(Float.MAX_VALUE));
>         assertEquals("ff800000", NumericEncoder.encode(Float.POSITIVE_INFINITY));
>         assertEquals("ffc00000", NumericEncoder.encode(Float.NaN));
>     }
>     /*
>      * Sample test for double
>      */
>     public void testDoubleEncoding()
>     {
>         assertEquals("000fffffffffffff", NumericEncoder.encode(Double.NEGATIVE_INFINITY));
>         assertEquals("0010000000000000", NumericEncoder.encode(-Double.MAX_VALUE));
>         assertEquals("7ffffffffffffffe", NumericEncoder.encode(-Double.MIN_VALUE));
>         assertEquals("7fffffffffffffff", NumericEncoder.encode(-0d));
>         assertEquals("8000000000000000", NumericEncoder.encode(0d));
>         assertEquals("8000000000000001", NumericEncoder.encode(Double.MIN_VALUE));
>         assertEquals("ffefffffffffffff", NumericEncoder.encode(Double.MAX_VALUE));
>         assertEquals("fff0000000000000", NumericEncoder.encode(Double.POSITIVE_INFINITY));
>         assertEquals("fff8000000000000", NumericEncoder.encode(Double.NaN));
>     }
> }

-- 
This message is automatically generated by JIRA.
-
If you think it was sent incorrectly contact one of the administrators:
   http://issues.apache.org/jira/secure/Administrators.jspa
-
For more information on JIRA, see:
   http://www.atlassian.com/software/jira


---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org


Mime
View raw message