lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r1080042 - in /lucene/dev/branches/lucene_solr_3_1: ./ lucene/ solr/ solr/CHANGES.txt solr/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Date Wed, 09 Mar 2011 22:40:45 GMT
Author: uschindler
Date: Wed Mar  9 22:40:45 2011
New Revision: 1080042

URL: http://svn.apache.org/viewvc?rev=1080042&view=rev
Log:
SOLR-2414: All ResponseWriters now use only ServletOutputStreams and wrap their own Writer
around it when serializing. This fixes the bug in PHPSerializedResponseWriter that produced
wrong string length if the servlet container had a broken UTF-8 encoding that was in fact
CESU-8 (see SOLR-1091). The hack was removed by this followup-patch.

Modified:
    lucene/dev/branches/lucene_solr_3_1/   (props changed)
    lucene/dev/branches/lucene_solr_3_1/lucene/   (props changed)
    lucene/dev/branches/lucene_solr_3_1/solr/   (props changed)
    lucene/dev/branches/lucene_solr_3_1/solr/CHANGES.txt
    lucene/dev/branches/lucene_solr_3_1/solr/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java

Modified: lucene/dev/branches/lucene_solr_3_1/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_1/solr/CHANGES.txt?rev=1080042&r1=1080041&r2=1080042&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_1/solr/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_3_1/solr/CHANGES.txt Wed Mar  9 22:40:45 2011
@@ -670,6 +670,14 @@ Other Changes
   hardcoded on text/xml as Content-Type, you have to change them.
   (uschindler, rmuir)
 
+* SOLR-2414: All ResponseWriters now use only ServletOutputStreams
+  and wrap their own Writer around it when serializing. This fixes
+  the bug in PHPSerializedResponseWriter that produced wrong string
+  length if the servlet container had a broken UTF-8 encoding that was
+  in fact CESU-8 (see SOLR-1091). The system property to enable the
+  CESU-8 byte counting in PHPSerializesResponseWriters for broken
+  servlet containers was therefore removed and is now ignored if set.
+  Output is always UTF-8.  (uschindler, yonik, rmuir)
 
 Build
 ----------------------

Modified: lucene/dev/branches/lucene_solr_3_1/solr/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_1/solr/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java?rev=1080042&r1=1080041&r2=1080042&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_1/solr/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
(original)
+++ lucene/dev/branches/lucene_solr_3_1/solr/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Wed Mar  9 22:40:45 2011
@@ -36,31 +36,15 @@ import org.apache.solr.common.SolrDocume
 /**
  * A description of the PHP serialization format can be found here:
  * http://www.hurring.com/scott/code/perl/serialize/
- *
- * <p>
- * In order to support PHP Serialized strings with a proper byte count, This ResponseWriter
- * must know if the Writers passed to it will result in an output of CESU-8 (UTF-8 w/o support
- * for large code points outside of the BMP)
- * <p>
- * Solr versions before 3.1 assume that all Jetty servlet containers (detected using the
"jetty.home"
- * system property) use CESU-8 instead of UTF-8 (verified to the current release of 6.1.26).
- * Solr 3.1 contains a patched version of Jetty that uses real UTF-8 (SOLR-2381)
- * <p>
- * In installations where Solr auto-detects incorrectly, the Solr Administrator should set
the
- * "solr.phps.cesu8" system property to either "true" or "false" accordingly.
  */
 public class PHPSerializedResponseWriter implements QueryResponseWriter {
   static String CONTENT_TYPE_PHP_UTF8="text/x-php-serialized;charset=UTF-8";
 
-  // Is this servlet container's UTF-8 encoding actually CESU-8 (i.e. lacks support for
-  // large characters outside the BMP).
-  boolean CESU8 = false;
   public void init(NamedList n) {
-    CESU8 = "true".equals(System.getProperty("solr.phps.cesu8"));
   }
   
  public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException
{
-    PHPSerializedWriter w = new PHPSerializedWriter(writer, req, rsp, CESU8);
+    PHPSerializedWriter w = new PHPSerializedWriter(writer, req, rsp);
     try {
       w.writeResponse();
     } finally {
@@ -74,13 +58,11 @@ public class PHPSerializedResponseWriter
 }
 
 class PHPSerializedWriter extends JSONWriter {
-  final private boolean CESU8;
   final UnicodeUtil.UTF8Result utf8;
 
-  public PHPSerializedWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp,
boolean CESU8) {
+  public PHPSerializedWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp)
{
     super(writer, req, rsp);
-    this.CESU8 = CESU8;
-    this.utf8 = CESU8 ? null : new UnicodeUtil.UTF8Result();
+    this.utf8 = new UnicodeUtil.UTF8Result();
     // never indent serialized PHP data
     doIndent = false;
   }
@@ -390,23 +372,8 @@ class PHPSerializedWriter extends JSONWr
   public void writeStr(String name, String val, boolean needsEscaping) throws IOException
{
     // serialized PHP strings don't need to be escaped at all, however the 
     // string size reported needs be the number of bytes rather than chars.
-    int nBytes;
-    if (CESU8) {
-      nBytes = 0;
-      for (int i=0; i<val.length(); i++) {
-        char ch = val.charAt(i);
-        if (ch<='\u007f') {
-          nBytes += 1;
-        } else if (ch<='\u07ff') {
-          nBytes += 2;
-        } else {
-          nBytes += 3;
-        }
-      }
-    } else {
-      UnicodeUtil.UTF16toUTF8(val, 0, val.length(), utf8);
-      nBytes = utf8.length;
-    }
+    UnicodeUtil.UTF16toUTF8(val, 0, val.length(), utf8);
+    int nBytes = utf8.length;
 
     writer.write("s:");
     writer.write(Integer.toString(nBytes));



Mime
View raw message