lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r756760 - in /lucene/java/trunk/src: java/org/apache/lucene/document/CompressionTools.java java/org/apache/lucene/document/Field.java test/org/apache/lucene/document/TestBinaryDocument.java
Date Fri, 20 Mar 2009 21:10:12 GMT
Author: mikemccand
Date: Fri Mar 20 21:10:12 2009
New Revision: 756760

URL: http://svn.apache.org/viewvc?rev=756760&view=rev
Log:
LUCENE-652: add compress/decompressString, too

Modified:
    lucene/java/trunk/src/java/org/apache/lucene/document/CompressionTools.java
    lucene/java/trunk/src/java/org/apache/lucene/document/Field.java
    lucene/java/trunk/src/test/org/apache/lucene/document/TestBinaryDocument.java

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/CompressionTools.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/CompressionTools.java?rev=756760&r1=756759&r2=756760&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/CompressionTools.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/CompressionTools.java Fri Mar 20
21:10:12 2009
@@ -21,21 +21,14 @@
 import java.util.zip.Inflater;
 import java.util.zip.DataFormatException;
 import java.io.ByteArrayOutputStream;
+import org.apache.lucene.util.UnicodeUtil;
 
 /** Simple utility class providing static methods to
  *  compress and decompress binary data for stored fields.
  *  This class uses java.util.zip.Deflater and Inflater
- *  classes to compress and decompress.
- *
- *  To compress a String:
- *  <pre>
- *    String string = ...
- *    byte[] bytes = compress(string.getBytes("UTF-8");
- *  </pre>
- *  and  to decompress:
- *  <pre>
- *    new String(decompress(bytes), "UTF-8");
- *  </pre>
+ *  classes to compress and decompress, which is the same
+ *  format previously used by the now deprecated
+ *  Field.Store.COMPRESS.
  */
 
 public class CompressionTools {
@@ -84,6 +77,20 @@
     return compress(value, 0, value.length, Deflater.BEST_COMPRESSION);
   }
 
+  /** Compresses the String value, with default BEST_COMPRESSION level */
+  public static byte[] compressString(String value) {
+    return compressString(value, Deflater.BEST_COMPRESSION);
+  }
+
+  /** Compresses the String value using the specified
+   *  compressionLevel (constants are defined in
+   *  java.util.zip.Deflater). */
+  public static byte[] compressString(String value, int compressionLevel) {
+    UnicodeUtil.UTF8Result result = new UnicodeUtil.UTF8Result();
+    UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result);
+    return compress(result.result, 0, result.length, compressionLevel);
+  }
+
   /** Decompress the byte array previously returned by
    *  compress */
   public static byte[] decompress(byte[] value) throws DataFormatException {
@@ -107,4 +114,13 @@
     
     return bos.toByteArray();
   }
+
+  /** Decompress the byte array previously returned by
+   *  compressString back into a String */
+  public static String decompressString(byte[] value) throws DataFormatException {
+    UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result();
+    final byte[] bytes = decompress(value);
+    UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
+    return new String(result.result, 0, result.length);
+  }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/Field.java?rev=756760&r1=756759&r2=756760&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/Field.java Fri Mar 20 21:10:12 2009
@@ -43,7 +43,11 @@
 
     /** Store the original field value in the index in a compressed form. This is
      * useful for long documents and for binary valued fields.
-     * @deprecated Please use {@link CompressionTools} instead
+     * @deprecated Please use {@link CompressionTools} instead.
+     * For string fields that were previously indexed and stored using compression,
+     * the new way to achive this is: First add the field indexed-only (no store)
+     * and additionally using the same field name as a binary, stored field
+     * with {@link CompressionTools#compressString}.
      */
     public static final Store COMPRESS = new Store("COMPRESS");
 

Modified: lucene/java/trunk/src/test/org/apache/lucene/document/TestBinaryDocument.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/document/TestBinaryDocument.java?rev=756760&r1=756759&r2=756760&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/document/TestBinaryDocument.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/document/TestBinaryDocument.java Fri Mar
20 21:10:12 2009
@@ -103,10 +103,12 @@
     throws Exception
   {
     Fieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes()),
Field.Store.YES);
+    Fieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.compressString(binaryValCompressed),
Field.Store.YES);
     
     Document doc = new Document();
     
     doc.add(binaryFldCompressed);
+    doc.add(stringFldCompressed);
     
     /** add the doc to a ram index */
     MockRAMDirectory dir = new MockRAMDirectory();
@@ -122,7 +124,8 @@
     /** fetch the binary compressed field and compare it's content with the original one
*/
     String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")));
     assertTrue(binaryFldCompressedTest.equals(binaryValCompressed));
-    
+    assertTrue(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")).equals(binaryValCompressed));
+
     reader.close();
     dir.close();
   }



Mime
View raw message