manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1593193 - /manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
Date Thu, 08 May 2014 07:28:00 GMT
Author: kwright
Date: Thu May  8 07:28:00 2014
New Revision: 1593193

URL: http://svn.apache.org/r1593193
Log:
Revise so it (a) builds and so (b) packing and unpacking are properly abstracted

Modified:
    manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java

Modified: manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java?rev=1593193&r1=1593192&r2=1593193&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
(original)
+++ manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
Thu May  8 07:28:00 2014
@@ -27,6 +27,8 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Locale;
+import java.util.Set;
+import java.util.HashSet;
 
 import org.apache.commons.io.FilenameUtils;
 import org.apache.http.Consts;
@@ -282,89 +284,8 @@ public class AmazonCloudSearchConnector 
   public String getOutputDescription(OutputSpecification os)
     throws ManifoldCFException, ServiceInterruption
   {
-    // Do the source/target pairs
-    int i = 0;
-    Map<String, List<String>> sourceTargets = new HashMap<String, List<String>>();
-    boolean keepAllMetadata = true;
-    while (i < os.getChildCount()) {
-      SpecificationNode sn = os.getChild(i++);
-      
-      if(sn.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA)) {
-        String value = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
-        keepAllMetadata = Boolean.parseBoolean(value);
-      } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP)) {
-        String source = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE);
-        String target = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_TARGET);
-        
-        if (target == null) {
-          target = "";
-        }
-        List<String> list = (List<String>)sourceTargets.get(source);
-        if (list == null) {
-          list = new ArrayList<String>();
-          sourceTargets.put(source, list);
-        }
-        list.add(target);
-      }
-    }
-    
-    String[] sortArray = new String[sourceTargets.size()];
-    Iterator iter = sourceTargets.keySet().iterator();
-    i = 0;
-    while (iter.hasNext()) {
-      sortArray[i++] = (String)iter.next();
-    }
-    java.util.Arrays.sort(sortArray);
-    
-    ArrayList<String[]> sourceTargetsList = new ArrayList<String[]>();
-    i = 0;
-    while (i < sortArray.length) {
-      String source = sortArray[i++];
-      List<String> values = (List<String>)sourceTargets.get(source);
-      java.util.Collections.sort(values);
-      int j = 0;
-      while (j < values.size()) {
-        String target = (String)values.get(j++);
-        String[] fixedList = new String[2];
-        fixedList[0] = source;
-        fixedList[1] = target;
-        sourceTargetsList.add(fixedList);
-      }
-    }
-    
-    //for Content tab
-    AmazonCloudSearchSpecs spec = new AmazonCloudSearchSpecs(getSpecNode(os));
-    
-    //build json 
-    // {"fieldMappings":{"Content-Style-Type":"content_type","dc:title":"title","description":"description"},"keepAllMetadata":true}
-    String resultBody = "";
-    try {
-      ObjectMapper mapper = new ObjectMapper();
-      
-      ObjectNode rootNode = mapper.createObjectNode();
-      ObjectNode fm = rootNode.putObject("fieldMappings");
-      for(String[] f : sourceTargetsList){
-        fm.put(f[0], f[1]);
-      }
-      
-      // Keep all metadata flag
-      if (keepAllMetadata)
-      {
-        rootNode.put("keepAllMetadata", true);
-      }
-      else
-      {
-        rootNode.put("keepAllMetadata", false);
-      }
-      
-      // Content tab..
-      rootNode.put("content", mapper.valueToTree(spec));
-      
-      resultBody = mapper.writeValueAsString(rootNode);
-    } catch (JsonProcessingException e) {
-      throw new ManifoldCFException(e);
-    }
-    return resultBody;
+    SpecPacker sp = new SpecPacker(os);
+    return sp.toPackedString();
   }
 
   /** Detect if a mime type is indexable or not.  This method is used by participating repository
connectors to pre-filter the number of
@@ -376,58 +297,31 @@ public class AmazonCloudSearchConnector 
   public boolean checkMimeTypeIndexable(String outputDescription, String mimeType)
     throws ManifoldCFException, ServiceInterruption
   {
-    try {
-      ObjectMapper mapper = new ObjectMapper();
-      AmazonCloudSearchSpecs spec = new AmazonCloudSearchSpecs(mapper.readTree(outputDescription).get("content"));
-      if(spec.checkMimeType(mimeType))
-      {
-        return super.checkMimeTypeIndexable(mimeType);
-      }else
-      {
-        return false;
-      }
-    } catch (JsonProcessingException e) {
-      throw new ManifoldCFException(e);
-    } catch (IOException e) {
-      throw new ManifoldCFException(e);
-    }
+    SpecPacker sp = new SpecPacker(outputDescription);
+    if (sp.checkMimeType(mimeType))
+      return super.checkMimeTypeIndexable(outputDescription, mimeType);
+    else
+      return false;
   }
 
   @Override
   public boolean checkLengthIndexable(String outputDescription, long length)
-      throws ManifoldCFException, ServiceInterruption {
-    try {
-      ObjectMapper mapper = new ObjectMapper();
-      AmazonCloudSearchSpecs spec = new AmazonCloudSearchSpecs(mapper.readTree(outputDescription).get("content"));
-      long maxFileSize = spec.getMaxFileSize();
-      if (length > maxFileSize)
-      {
-        return false;
-      }
+    throws ManifoldCFException, ServiceInterruption {
+    SpecPacker sp = new SpecPacker(outputDescription);
+    if (sp.checkLengthIndexable(length))
       return super.checkLengthIndexable(outputDescription, length);
-    } catch (JsonProcessingException e) {
-      throw new ManifoldCFException(e);
-    } catch (IOException e){
-      throw new ManifoldCFException(e);
-    }
+    else
+      return false;
   }
 
   @Override
   public boolean checkURLIndexable(String outputDescription, String url)
     throws ManifoldCFException, ServiceInterruption {
-    try {
-      ObjectMapper mapper = new ObjectMapper();
-      AmazonCloudSearchSpecs spec = new AmazonCloudSearchSpecs(mapper.readTree(outputDescription).get("content"));
-      if (spec.checkExtension(FilenameUtils.getExtension(url)))
-      {
-        return super.checkURLIndexable(outputDescription, url);
-      }
+    SpecPacker sp = new SpecPacker(outputDescription);
+    if (sp.checkURLIndexable(url))
+      return super.checkURLIndexable(outputDescription, url);
+    else
       return false;
-    } catch (JsonProcessingException e) {
-      throw new ManifoldCFException(e);
-    } catch (IOException e){
-      throw new ManifoldCFException(e);
-    }
   }
   
   /** Add (or replace) a document in the output data store using the connector.
@@ -830,7 +724,6 @@ public class AmazonCloudSearchConnector 
   {
     // Prep for field mappings
     List<Map<String,String>> fieldMappings = new ArrayList<Map<String,String>>();
-    int i = 0;
     String keepAllMetadataValue = "true";
     for (int i = 0; i < os.getChildCount(); i++)
     {
@@ -838,6 +731,7 @@ public class AmazonCloudSearchConnector 
       if (sn.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP)) {
         String source = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE);
         String target = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_TARGET);
+        String targetDisplay;
         if (target == null)
         {
           target = "";
@@ -868,12 +762,12 @@ public class AmazonCloudSearchConnector 
     for (int i = 0; i < os.getChildCount(); i++)
     {
       SpecificationNode sn = os.getChild(i);
-      if (sn.getType.equals(AmazonCloudSearchConfig.NODE_MAXLENGTH))
-        maxFileSize = sn.getAttribute(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+      if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MAXLENGTH))
+        maxFileSize = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
       else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES))
-        allowedMimeTypes = sn.getValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+        allowedMimeTypes = sn.getValue();
       else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_EXTENSIONS))
-        allowedFileExtensions = sn.getValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+        allowedFileExtensions = sn.getValue();
     }
     paramMap.put("MAXFILESIZE",maxFileSize);
     paramMap.put("MIMETYPES",allowedMimeTypes);
@@ -955,7 +849,7 @@ public class AmazonCloudSearchConnector 
           i++;
       }
       SpecificationNode sn = new SpecificationNode(AmazonCloudSearchConfig.NODE_MAXLENGTH);
-      sn.setAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE,x);
+      sn.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_VALUE,x);
       os.addChild(os.getChildCount(),sn);
     }
 
@@ -1060,19 +954,6 @@ public class AmazonCloudSearchConnector 
   }
   
 
-  final private SpecificationNode getSpecNode(OutputSpecification os) {
-    int l = os.getChildCount();
-    for (int i = 0; i < l; i++)
-    {
-      SpecificationNode node = os.getChild(i);
-      if (AmazonCloudSearchSpecs.AMAZONCLOUDSEARCH_SPECS_NODE.equals(node.getType()))
-      {
-        return node;
-      }
-    }
-    return null;
-  }
-  
   /** View specification.
   * This method is called in the body section of a job's view page.  Its purpose is to present
the output specification information to the user.
   * The coder can presume that the HTML that is output from this configuration will be within
appropriate <html> and <body> tags.
@@ -1092,5 +973,85 @@ public class AmazonCloudSearchConnector 
     Messages.outputResourceWithVelocity(out,locale,VIEW_SPECIFICATION_HTML,paramMap);
     
   }
+  
+  protected static class SpecPacker {
+    
+    private final Map<String,String> sourceTargets = new HashMap<String,String>();
+    private final boolean keepAllMetadata;
+    private final Set<String> extensions = new HashSet<String>();
+    private final Set<String> mimeTypes = new HashSet<String>();
+    private final Long lengthCutoff;
+    
+    public SpecPacker(OutputSpecification os) {
+      boolean keepAllMetadata = true;
+      for (int i = 0; i < os.getChildCount(); i++) {
+        SpecificationNode sn = os.getChild(i);
+        
+        if(sn.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA)) {
+          String value = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+          keepAllMetadata = Boolean.parseBoolean(value);
+        } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP)) {
+          String source = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE);
+          String target = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_TARGET);
+          
+          if (target == null) {
+            target = "";
+          }
+          sourceTargets.put(source, target);
+        }
+      }
+      this.keepAllMetadata = keepAllMetadata;
+      // MHL for mimetypes and extensions and length
+      this.lengthCutoff = null;
+    }
+    
+    public SpecPacker(String packedString) {
+      // MHL
+      this.keepAllMetadata = true;
+      this.lengthCutoff = null;
+    }
+    
+    public String toPackedString() {
+      StringBuilder sb = new StringBuilder();
+      
+      String[] sortArray = new String[sourceTargets.size()];
+      int i = 0;
+      for (String source : sourceTargets.keySet()) {
+        sortArray[i++] = source;
+      }
+      java.util.Arrays.sort(sortArray);
+      
+      List<String> packedMappings = new ArrayList<String>();
+      String[] fixedList = new String[2];
+      for (String source : sortArray) {
+        String target = sourceTargets.get(source);
+        StringBuilder localBuffer = new StringBuilder();
+        fixedList[0] = source;
+        fixedList[1] = target;
+        packFixedList(localBuffer,fixedList,':');
+        packedMappings.add(localBuffer.toString());
+      }
+      packList(sb,packedMappings,'+');
+
+      // MHL for mimetypes and all metadata and extensions and length
+      return sb.toString();
+    }
+    
+    public boolean checkLengthIndexable(long length) {
+      if (lengthCutoff == null)
+        return true;
+      return (length <= lengthCutoff.longValue());
+    }
+    
+    public boolean checkMimeType(String mimeType) {
+      return mimeTypes.contains(mimeType);
+    }
+    
+    public boolean checkURLIndexable(String url) {
+      String extension = FilenameUtils.getExtension(url);
+      return extensions.contains(extension);
+    }
+    
+  }
   
 }



Mime
View raw message