manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1625910 - in /manifoldcf/trunk: ./ connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/
Date Thu, 18 Sep 2014 08:03:04 GMT
Author: kwright
Date: Thu Sep 18 08:03:03 2014
New Revision: 1625910

URL: http://svn.apache.org/r1625910
Log:
Tentative fix for CONNECTORS-956.

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
    manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/ModifiedHttpSolrServer.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1625910&r1=1625909&r2=1625910&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Thu Sep 18 08:03:03 2014
@@ -3,6 +3,9 @@ $Id$
 
 ======================= 2.0-dev =====================
 
+CONNECTORS-956: Change how Solr field name escaping is done.
+(Piergiorgio Lucidi, Shinichiro Abe, Edgardo Ambrosi, Karl Wright)
+
 CONNECTORS-1036: Zookeeper service handling also has
 ephemeral nodes which need to be tied to sessions.
 (Karl Wright)

Modified: manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java?rev=1625910&r1=1625909&r2=1625910&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
(original)
+++ manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
Thu Sep 18 08:03:03 2014
@@ -762,22 +762,10 @@ public class HttpPoster
     return new String[0];
   }
 
-  /** Preprocess field name.
-  * SolrJ has a bug where it does not URL-escape field names.  This causes carnage for
-  * ManifoldCF, because it results in IllegalArgumentExceptions getting thrown deep in SolrJ.
-  * See CONNECTORS-630.
-  * In order to get around this, we need to URL-encode argument names, at least until the
underlying
-  * SolrJ issue is fixed.
-  */
-  protected static String preEncode(String fieldName)
-  {
-      return URLEncoder.encode(fieldName);
-  }
-  
   /** Write a field */
   protected static void writeField(ModifiableSolrParams out, String fieldName, String[] fieldValues)
   {
-    out.add(preEncode(fieldName), fieldValues);
+    out.add(fieldName, fieldValues);
   }
   
   /** Write a field */
@@ -794,7 +782,7 @@ public class HttpPoster
   /** Write a field */
   protected static void writeField(ModifiableSolrParams out, String fieldName, String fieldValue)
   {
-    out.add(preEncode(fieldName), fieldValue);
+    out.add(fieldName, fieldValue);
   }
 
   /** Output an acl level */
@@ -1147,7 +1135,7 @@ public class HttpPoster
       Iterator<String> iter = document.getFields();
       while (iter.hasNext())
       {
-        String fieldName = iter.next();
+        String fieldName = makeSafeLuceneField(iter.next());
         applySingleMapping(fieldName, out, fieldName);
       }
     }
@@ -1157,7 +1145,7 @@ public class HttpPoster
       Iterator<String> iter = document.getFields();
       while (iter.hasNext())
       {
-        String fieldName = iter.next();
+        String fieldName = makeSafeLuceneField(iter.next());
         applySingleMapping(fieldName, outputDocument, fieldName);
       }
     }
@@ -1570,5 +1558,36 @@ public class HttpPoster
     }
   }
 
+  /** See CONNECTORS-956.  Make a safe lucene field name from a possibly
+  * unsafe input field name from a repository connector.
+  */
+  protected static String makeSafeLuceneField(String inputField)
+  {
+    StringBuilder sb = new StringBuilder();
+    boolean isFirst = true;
+    for (int i = 0; i < inputField.length(); i++)
+    {
+      char x = inputField.charAt(i);
+      if (isFirst && !Character.isJavaIdentifierStart(x) || !isFirst && !Character.isJavaIdentifierPart(x))
+      {
+        // Check for exceptions for Lucene
+        if (!isFirst && (x == '.' || x == '-'))
+          sb.append(x);
+        else
+          sb.append('_');
+      }
+      else
+      {
+        // Check for exceptions for Lucene
+        if (isFirst && x == '$')
+          sb.append('_');
+        else
+          sb.append(x);
+      }
+      isFirst = false;
+    }
+    return sb.toString();
+  }
+  
 }
 

Modified: manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/ModifiedHttpSolrServer.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/ModifiedHttpSolrServer.java?rev=1625910&r1=1625909&r2=1625910&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/ModifiedHttpSolrServer.java
(original)
+++ manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/ModifiedHttpSolrServer.java
Thu Sep 18 08:03:03 2014
@@ -26,6 +26,7 @@ import java.util.Collection;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
+import java.net.URLEncoder;
 
 import org.apache.http.Header;
 import org.apache.http.HttpResponse;
@@ -140,7 +141,7 @@ public class ModifiedHttpSolrServer exte
             if( streams != null ) {
               throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "GET can't send
streams!" );
             }
-            method = new HttpGet( baseUrl + path + ClientUtils.toQueryString( params, false
) );
+            method = new HttpGet( baseUrl + path + toQueryString( params, false ) );
           }
           else if( SolrRequest.METHOD.POST == request.getMethod() ) {
 
@@ -211,7 +212,7 @@ public class ModifiedHttpSolrServer exte
             }
             // It is has one stream, it is the post body, put the params in the URL
             else {
-              String pstr = ClientUtils.toQueryString(params, false);
+              String pstr = toQueryString(params, false);
               HttpPost post = new HttpPost(url + pstr);
 
               // Single stream as body
@@ -364,4 +365,35 @@ public class ModifiedHttpSolrServer exte
     this.followRedirects = followRedirects;
   }
 
+  public static String toQueryString( SolrParams params, boolean xml ) {
+    StringBuilder sb = new StringBuilder(128);
+    try {
+      String amp = xml ? "&amp;" : "&";
+      boolean first=true;
+      Iterator<String> names = params.getParameterNamesIterator();
+      while( names.hasNext() ) {
+        String key = names.next();
+        String[] valarr = params.getParams( key );
+        if( valarr == null ) {
+          sb.append( first?"?":amp );
+          sb.append( URLEncoder.encode(key, "UTF-8") );
+          first=false;
+        }
+        else {
+          for (String val : valarr) {
+            sb.append( first? "?":amp );
+            sb.append(key);
+            if( val != null ) {
+              sb.append('=');
+              sb.append( URLEncoder.encode( val, "UTF-8" ) );
+            }
+            first=false;
+          }
+        }
+      }
+    }
+    catch (IOException e) {throw new RuntimeException(e);}  // can't happen
+    return sb.toString();
+  }
+  
 }



Mime
View raw message