uima-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tomm...@apache.org
Subject svn commit: r998787 - in /uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika: FileSystemCollectionReader.java TIKAWrapper.java
Date Mon, 20 Sep 2010 05:42:35 GMT
Author: tommaso
Date: Mon Sep 20 05:42:35 2010
New Revision: 998787

URL: http://svn.apache.org/viewvc?rev=998787&view=rev
Log:
[UIMA-1878] - applied patch from Greg Holmberg to handle spaces in path string

Modified:
    uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/FileSystemCollectionReader.java
    uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/TIKAWrapper.java

Modified: uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/FileSystemCollectionReader.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/FileSystemCollectionReader.java?rev=998787&r1=998786&r2=998787&view=diff
==============================================================================
--- uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/FileSystemCollectionReader.java
(original)
+++ uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/FileSystemCollectionReader.java
Mon Sep 20 05:42:35 2010
@@ -96,11 +96,11 @@ public class FileSystemCollectionReader 
 
 		// call Tika wrapper 
 		try {
-			tika.populateCASfromURL(aCAS, file.toURL(), this.mMIME, this.mLanguage);
+			tika.populateCASfromURI(aCAS, file.toURI(), this.mMIME, this.mLanguage);
 		} catch (CASException e) {
 			getLogger().log(Level.WARNING,"Problem converting file : "+file.toURL()+"\t"+e.getMessage());
-	    	jcas.setDocumentText(" ");
-	    	return;
+			throw new IOException(e);
+	    	//jcas.setDocumentText(" "); return;
 		}
 	}
 

Modified: uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/TIKAWrapper.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/TIKAWrapper.java?rev=998787&r1=998786&r2=998787&view=diff
==============================================================================
--- uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/TIKAWrapper.java (original)
+++ uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/TIKAWrapper.java Mon
Sep 20 05:42:35 2010
@@ -22,7 +22,7 @@ package org.apache.uima.tika;
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.net.URL;
+import java.net.URI;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.TikaException;
@@ -33,6 +33,7 @@ import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.util.Level;
 
 
 public class TIKAWrapper {
@@ -54,16 +55,16 @@ public class TIKAWrapper {
 	}
 	
 	
-	public void populateCASfromURL(CAS cas, URL url, String language) throws CASException{
-		populateCASfromURL(cas, url, null, language);
+	public void populateCASfromURI(CAS cas, URI uri, String language) throws CASException{
+		populateCASfromURI(cas, uri, null, language);
 	}
 	
-	public void populateCASfromURL(CAS cas, URL url, String mime, String language) throws CASException{
+	public void populateCASfromURI(CAS cas, URI uri, String mime, String language) throws CASException{
 	
 		InputStream originalStream=null;
 		try {
-			originalStream = new BufferedInputStream(url
-					.openStream());
+			originalStream = new BufferedInputStream(
+					uri.toURL().openStream());
 		} catch (IOException e1) {
 			new CASException(e1);
 		}
@@ -86,8 +87,8 @@ public class TIKAWrapper {
 	    catch (Exception e){
 	    	// if we have a problem just dump the message and continue
 	    	// getLogger().log(Level.WARNING,"Problem converting file : "+URI+"\t"+e.getMessage());
-	    	cas.setDocumentText("");
-	    	return;
+	    	// cas.setDocumentText(""); return;
+	    	throw new CASException(e);
 	    }
 	    finally {
 			// set language if it was explicitly specified as a configuration
@@ -126,7 +127,7 @@ public class TIKAWrapper {
 	    
 	    FeatureValue fv = new FeatureValue(jcas);
     	fv.setName("uri");
-    	fv.setValue(url.toString());
+    	fv.setValue(uri.toString());
     	docAnnotation.setFeatures(i,fv);
 	    
 	    docAnnotation.addToIndexes();



Mime
View raw message