Return-Path: Delivered-To: apmail-incubator-tika-commits-archive@locus.apache.org Received: (qmail 98032 invoked from network); 25 Sep 2007 11:49:12 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 25 Sep 2007 11:49:12 -0000 Received: (qmail 58123 invoked by uid 500); 25 Sep 2007 11:49:02 -0000 Delivered-To: apmail-incubator-tika-commits-archive@incubator.apache.org Received: (qmail 58094 invoked by uid 500); 25 Sep 2007 11:49:02 -0000 Mailing-List: contact tika-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: tika-dev@incubator.apache.org Delivered-To: mailing list tika-commits@incubator.apache.org Received: (qmail 58085 invoked by uid 99); 25 Sep 2007 11:49:02 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 25 Sep 2007 04:49:02 -0700 X-ASF-Spam-Status: No, hits=-100.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 25 Sep 2007 11:49:05 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id D76C71A9832; Tue, 25 Sep 2007 04:48:44 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r579208 - in /incubator/tika/trunk: ./ src/main/java/org/apache/tika/config/ src/main/java/org/apache/tika/parser/ src/main/java/org/apache/tika/parser/html/ src/main/java/org/apache/tika/parser/msexcel/ src/main/java/org/apache/tika/parser... Date: Tue, 25 Sep 2007 11:48:42 -0000 To: tika-commits@incubator.apache.org From: jukka@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20070925114844.D76C71A9832@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: jukka Date: Tue Sep 25 04:48:38 2007 New Revision: 579208 URL: http://svn.apache.org/viewvc?rev=579208&view=rev Log: TIKA-26 - Use Map instead of List Modified: incubator/tika/trunk/CHANGES.txt incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java incubator/tika/trunk/src/main/java/org/apache/tika/parser/Parser.java incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java incubator/tika/trunk/src/main/java/org/apache/tika/parser/msexcel/MsExcelParser.java incubator/tika/trunk/src/main/java/org/apache/tika/parser/mspowerpoint/MsPowerPointParser.java incubator/tika/trunk/src/main/java/org/apache/tika/parser/msword/MsWordParser.java incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java incubator/tika/trunk/src/main/java/org/apache/tika/parser/pdf/PDFParser.java incubator/tika/trunk/src/main/java/org/apache/tika/parser/rtf/RTFParser.java incubator/tika/trunk/src/main/java/org/apache/tika/parser/txt/TXTParser.java incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java incubator/tika/trunk/src/main/java/org/apache/tika/utils/MSExtractor.java incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java Modified: incubator/tika/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/CHANGES.txt (original) +++ incubator/tika/trunk/CHANGES.txt Tue Sep 25 04:48:38 2007 @@ -44,3 +44,5 @@ 20. TIKA-30 - Added utility constructors to TikaConfig (K. Bennett & jukka) 21. TIKA-28 - Rename config.xml to tika-config.xml or similar (mattmann) + +22. TIKA-26 - Use Map instead of List (jukka) Modified: incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java Tue Sep 25 04:48:38 2007 @@ -24,8 +24,6 @@ */ public class Content { - private final String name; - private final String textSelect; private final String xPathSelect; @@ -37,14 +35,9 @@ private String[] values; public Content(Element element) { - name = element.getAttributeValue("name"); xPathSelect = element.getAttributeValue("xpathSelect"); textSelect = element.getAttributeValue("textSelect"); regexSelect = element.getChildTextTrim("regexSelect"); - } - - public String getName() { - return name; } public String getRegexSelect() { Modified: incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java Tue Sep 25 04:48:38 2007 @@ -16,9 +16,9 @@ */ package org.apache.tika.config; -import java.util.ArrayList; import java.util.Collections; -import java.util.List; +import java.util.HashMap; +import java.util.Map; import org.jdom.Element; @@ -33,8 +33,8 @@ private final String nameSpace; - private final List contents = new ArrayList(); -; + private final Map contents = + new HashMap(); public ParserConfig(Element element) { name = element.getAttributeValue("name"); @@ -43,7 +43,8 @@ Element extract = element.getChild("extract"); if (extract != null) { for (Object child : extract.getChildren()) { - contents.add(new Content((Element) child)); + String name = ((Element) child).getAttributeValue("name"); + contents.put(name, new Content((Element) child)); } } } @@ -60,8 +61,8 @@ return parserClass; } - public List getContents() { - return Collections.unmodifiableList(contents); + public Map getContents() { + return Collections.unmodifiableMap(contents); } } Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/Parser.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/Parser.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/parser/Parser.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/Parser.java Tue Sep 25 04:48:38 2007 @@ -17,7 +17,7 @@ package org.apache.tika.parser; import java.io.InputStream; -import java.util.List; +import java.util.Map; import org.apache.tika.config.Content; @@ -32,7 +32,7 @@ private String namespace; - private List contents; + private Map contents; protected String contentStr; @@ -80,12 +80,7 @@ * fulltext */ public Content getContent(String name) { - for (Content content : getContents()) { - if (name.equals(content.getName())) { - return content; - } - } - return null; + return getContents().get(name); } /** @@ -93,11 +88,11 @@ * TikaConfig Xml file. It could be a document metadata, XPath selection, * regex selection or fulltext */ - public List getContents() { + public Map getContents() { return contents; } - public void setContents(List contents) { + public void setContents(Map contents) { this.contents = contents; } Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java Tue Sep 25 04:48:38 2007 @@ -17,9 +17,9 @@ package org.apache.tika.parser.html; import java.io.InputStream; -import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Map; import org.apache.log4j.Logger; import org.apache.oro.text.regex.MalformedPatternException; @@ -42,19 +42,15 @@ private Node root = null; - public List getContents() { + public Map getContents() { if (contentStr == null) { if (root == null) root = getRoot(getInputStream()); contentStr = getTextContent(root); } - List ctt = super.getContents(); + Map ctt = super.getContents(); - if (ctt == null) { - return new ArrayList(0); - } - - Iterator i = ctt.iterator(); + Iterator i = ctt.values().iterator(); while (i.hasNext()) { Content ct = (Content) i.next(); if (ct.getTextSelect() != null) { @@ -94,7 +90,7 @@ private void extractElementTxt(Element root, Content content) { - NodeList children = root.getElementsByTagName(content.getName()); + NodeList children = root.getElementsByTagName(content.getTextSelect()); if (children != null) { if (children.getLength() > 0) { if (children.getLength() == 1) { Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/msexcel/MsExcelParser.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/msexcel/MsExcelParser.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/parser/msexcel/MsExcelParser.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/msexcel/MsExcelParser.java Tue Sep 25 04:48:38 2007 @@ -18,6 +18,7 @@ import java.util.Iterator; import java.util.List; +import java.util.Map; import org.apache.tika.config.Content; import org.apache.tika.parser.Parser; @@ -37,7 +38,7 @@ static Logger logger = Logger.getRootLogger(); - public List getContents() { + public Map getContents() { if (contentStr == null) { // extrator.setContents(getParserConfig().getContents()); try { @@ -47,8 +48,8 @@ e.printStackTrace(); } } - List ctt = super.getContents(); - Iterator i = ctt.iterator(); + Map ctt = super.getContents(); + Iterator i = ctt.values().iterator(); while (i.hasNext()) { Content ct = (Content) i.next(); if (ct.getTextSelect() != null) { Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/mspowerpoint/MsPowerPointParser.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/mspowerpoint/MsPowerPointParser.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/parser/mspowerpoint/MsPowerPointParser.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/mspowerpoint/MsPowerPointParser.java Tue Sep 25 04:48:38 2007 @@ -39,7 +39,7 @@ static Logger logger = Logger.getRootLogger(); - public List getContents() { + public Map getContents() { if (contentStr == null) { extrator.setContents(super.getContents()); try { @@ -49,8 +49,8 @@ e.printStackTrace(); } } - List ctt = super.getContents(); - Iterator i = ctt.iterator(); + Map ctt = super.getContents(); + Iterator i = ctt.values().iterator(); while (i.hasNext()) { Content ct = (Content) i.next(); if (ct.getTextSelect() != null) { Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/msword/MsWordParser.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/msword/MsWordParser.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/parser/msword/MsWordParser.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/msword/MsWordParser.java Tue Sep 25 04:48:38 2007 @@ -16,7 +16,6 @@ */ package org.apache.tika.parser.msword; -import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -40,7 +39,7 @@ static Logger logger = Logger.getRootLogger(); - public List getContents() { + public Map getContents() { if (contentStr == null) { // extractor try { @@ -50,8 +49,8 @@ e.printStackTrace(); } } - List ctt = super.getContents(); - Iterator i = ctt.iterator(); + Map ctt = super.getContents(); + Iterator i = ctt.values().iterator(); while (i.hasNext()) { Content ct = (Content) i.next(); if (ct.getTextSelect() != null) { Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java Tue Sep 25 04:48:38 2007 @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; @@ -82,15 +83,15 @@ return xmlDoc; } - public List getContents() { + public Map getContents() { if (xmlDoc == null) xmlDoc = parse(getInputStream()); if (contentStr == null) { contentStr = xp.concatOccurance(xmlDoc, "//*", " "); } List documentNs = xp.getAllDocumentNs(xmlDoc); - List ctt = super.getContents(); - Iterator it = ctt.iterator(); + Map ctt = super.getContents(); + Iterator it = ctt.values().iterator(); while (it.hasNext()) { Content content = (Content) it.next(); if (content.getXPathSelect() != null) { Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/pdf/PDFParser.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/parser/pdf/PDFParser.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/pdf/PDFParser.java Tue Sep 25 04:48:38 2007 @@ -20,6 +20,7 @@ import java.io.StringWriter; import java.util.Iterator; import java.util.List; +import java.util.Map; import org.apache.tika.config.Content; import org.apache.tika.parser.Parser; @@ -43,7 +44,7 @@ private PDDocument pdfDocument = null; - public List getContents() { + public Map getContents() { // String contents = getContent(); if (contentStr == null) { try { @@ -72,8 +73,8 @@ } } } - List ctt = super.getContents(); - Iterator i = ctt.iterator(); + Map ctt = super.getContents(); + Iterator i = ctt.values().iterator(); while (i.hasNext()) { Content ct = (Content) i.next(); Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/rtf/RTFParser.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/rtf/RTFParser.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/parser/rtf/RTFParser.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/rtf/RTFParser.java Tue Sep 25 04:48:38 2007 @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.Iterator; import java.util.List; +import java.util.Map; import javax.swing.text.BadLocationException; import javax.swing.text.DefaultStyledDocument; @@ -40,7 +41,7 @@ static Logger logger = Logger.getRootLogger(); - public List getContents() { + public Map getContents() { if (contentStr == null) { try { DefaultStyledDocument sd = new DefaultStyledDocument(); @@ -53,8 +54,8 @@ logger.error(j.getMessage()); } } - List ctt = super.getContents(); - Iterator i = ctt.iterator(); + Map ctt = super.getContents(); + Iterator i = ctt.values().iterator(); while (i.hasNext()) { Content ct = (Content) i.next(); if (ct.getTextSelect() != null) { Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/txt/TXTParser.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/txt/TXTParser.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/parser/txt/TXTParser.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/txt/TXTParser.java Tue Sep 25 04:48:38 2007 @@ -20,7 +20,6 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; -import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -41,7 +40,7 @@ static Logger logger = Logger.getRootLogger(); - public List getContents() { + public Map getContents() { if (contentStr == null) { StringBuffer sb = new StringBuffer(); try { @@ -65,8 +64,8 @@ } contentStr = sb.toString(); } - List ctt = super.getContents(); - Iterator i = ctt.iterator(); + Map ctt = super.getContents(); + Iterator i = ctt.values().iterator(); while (i.hasNext()) { Content ct = (Content) i.next(); if (ct.getTextSelect() != null) { Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java Tue Sep 25 04:48:38 2007 @@ -19,6 +19,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Map; import org.apache.tika.config.Content; import org.apache.tika.parser.Parser; @@ -52,7 +53,7 @@ private SimpleNamespaceContext nsc = new SimpleNamespaceContext(); - public List getContents() { + public Map getContents() { if (contentStr == null) { if (xmlDoc == null) xmlDoc = Utils.parse(getInputStream()); @@ -61,8 +62,8 @@ if (xmlDoc == null) xmlDoc = Utils.parse(getInputStream()); List documentNs = getAllDocumentNs(xmlDoc); - List ctt = super.getContents(); - Iterator it = ctt.iterator(); + Map ctt = super.getContents(); + Iterator it = ctt.values().iterator(); if (exist(documentNs, getNamespace())) { while (it.hasNext()) { Content content = (Content) it.next(); Modified: incubator/tika/trunk/src/main/java/org/apache/tika/utils/MSExtractor.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/utils/MSExtractor.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/utils/MSExtractor.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/utils/MSExtractor.java Tue Sep 25 04:48:38 2007 @@ -18,9 +18,7 @@ // JDK imports import java.io.InputStream; -import java.util.Date; -import java.util.List; -import java.util.Properties; +import java.util.Map; import org.apache.tika.config.Content; // Jakarta POI imports @@ -44,13 +42,13 @@ private POIFSReader reader = null; - private List contents; + private Map contents; /** Constructs a new Microsoft document extractor. */ public MSExtractor() { } - public void setContents(List contents){ + public void setContents(Map contents){ this.contents = contents; } @@ -61,7 +59,7 @@ // First, extract properties this.reader = new POIFSReader(); - this.reader.registerListener(new PropertiesReaderListener(contents), + this.reader.registerListener(new PropertiesReaderListener(), SummaryInformation.DEFAULT_STREAM_NAME); //input.reset(); if (input.available() > 0) { @@ -86,11 +84,6 @@ } private class PropertiesReaderListener implements POIFSReaderListener { - private List contents; - - PropertiesReaderListener(List contents) { - this.contents = contents; - } public void processPOIFSReaderEvent(POIFSReaderEvent event) { if (!event.getName().startsWith( @@ -101,9 +94,7 @@ try { SummaryInformation si = (SummaryInformation) PropertySetFactory .create(event.getStream()); - - for (int i = 0; i < contents.size(); i++) { - Content content = contents.get(i); + for (Content content : contents.values()) { if (content.getTextSelect().equalsIgnoreCase("title")) { content.setValue(si.getTitle()); } @@ -137,25 +128,6 @@ else if (content.getTextSelect().equalsIgnoreCase("")) { //content.setValue(si.getCharCount()); } - else if (content.getTextSelect().equals("")) { - - } - else if (content.getTextSelect().equals("")) { - - } - else if (content.getTextSelect().equals("")) { - - } - else if (content.getTextSelect().equals("")) { - - } - else if (content.getTextSelect().equals("")) { - - } - else if (content.getTextSelect().equals("")) { - - } - System.out.println(content.getName()+" :"+content.getValue()); } } catch (Exception ex) { Modified: incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java?rev=579208&r1=579207&r2=579208&view=diff ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java (original) +++ incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java Tue Sep 25 04:48:38 2007 @@ -32,6 +32,7 @@ import java.util.Collection; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; @@ -53,26 +54,25 @@ static Logger logger = Logger.getRootLogger(); - public static String toString(Collection structuredContent) { + public static String toString(Map structuredContent) { final StringWriter sw = new StringWriter(); print(structuredContent,sw); return sw.toString(); } - public static void print(Collection structuredContent) { + public static void print(Map structuredContent) { print(structuredContent,new OutputStreamWriter(System.out)); } - public static void print(Collection structuredContent,Writer outputWriter) { + public static void print(Map structuredContent,Writer outputWriter) { final PrintWriter output = new PrintWriter(outputWriter,true); - for (Iterator iter = structuredContent.iterator(); iter - .hasNext();) { - Content ct = iter.next(); + for (Map.Entry entry : structuredContent.entrySet()) { + Content ct = entry.getValue(); if (ct.getValue() != null) { - output.print(ct.getName() + ": "); + output.print(entry.getKey() + ": "); output.println(ct.getValue()); } else if (ct.getValues() != null) { - output.print(ct.getName() + ": "); + output.print(entry.getKey() + ": "); for (int j = 0; j < ct.getValues().length; j++) { if (j == 0) output.println(ct.getValues()[j]); @@ -82,7 +82,7 @@ } } else { // there are no values, but there is a Content object System.out.println( - "Content '" + ct.getName() + "' has no values."); + "Content '" + entry.getKey() + "' has no values."); } } }