incubator-any23-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ans...@apache.org
Subject svn commit: r1372269 [1/4] - in /incubator/any23/trunk: ./ api/ api/src/ api/src/main/ api/src/main/java/ api/src/main/java/org/ api/src/main/java/org/apache/ api/src/main/java/org/apache/any23/ api/src/main/java/org/apache/any23/cli/ api/src/main/java...
Date Mon, 13 Aug 2012 06:15:33 GMT
Author: ansell
Date: Mon Aug 13 06:15:29 2012
New Revision: 1372269

URL: http://svn.apache.org/viewvc?rev=1372269&view=rev
Log:
ANY23-114 : move classes out to api module

Added:
    incubator/any23/trunk/api/
    incubator/any23/trunk/api/pom.xml
    incubator/any23/trunk/api/src/
    incubator/any23/trunk/api/src/main/
    incubator/any23/trunk/api/src/main/java/
    incubator/any23/trunk/api/src/main/java/org/
    incubator/any23/trunk/api/src/main/java/org/apache/
    incubator/any23/trunk/api/src/main/java/org/apache/any23/
    incubator/any23/trunk/api/src/main/java/org/apache/any23/cli/
    incubator/any23/trunk/api/src/main/java/org/apache/any23/cli/Tool.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/
    incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/Configuration.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/ModifiableConfiguration.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/package-info.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/encoding/
    incubator/any23/trunk/api/src/main/java/org/apache/any23/encoding/EncodingDetector.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/encoding/package-info.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/
    incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionContext.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionException.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionResult.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/Extractor.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorDescription.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorFactory.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorRegistry.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/IssueReport.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/
    incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/MIMEType.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/MIMETypeDetector.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/purifier/
    incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/purifier/Purifier.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/purifier/package-info.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/plugin/
    incubator/any23/trunk/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/plugin/Author.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/plugin/ExtractorPlugin.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/plugin/package-info.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/rdf/
    incubator/any23/trunk/api/src/main/java/org/apache/any23/rdf/Prefixes.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/source/
    incubator/any23/trunk/api/src/main/java/org/apache/any23/source/DocumentSource.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/CSV.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/DCTERMS.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/DOAC.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/FOAF.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/GEO.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/HLISTING.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/HRECIPE.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/ICAL.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/OGP.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/REVIEW.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/SCHEMAORG.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/SINDICE.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/VCARD.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/Vocabulary.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/WO.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/XFN.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/XHTML.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/package-info.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/
    incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/FormatWriter.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/TripleHandler.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/TripleHandlerException.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/WriterFactory.java
    incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/NQuadsWriterFactory.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/NTriplesWriterFactory.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/RDFXMLWriterFactory.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/TriXWriterFactory.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/TurtleWriterFactory.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/URIListWriterFactory.java
Removed:
    incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Tool.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/configuration/Configuration.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/configuration/ModifiableConfiguration.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/configuration/package-info.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/encoding/EncodingDetector.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/encoding/package-info.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionContext.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionException.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionParameters.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractionResult.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/Extractor.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorDescription.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorFactory.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorGroup.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorRegistry.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/IssueReport.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/mime/MIMEType.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/mime/MIMETypeDetector.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/mime/purifier/Purifier.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/mime/purifier/package-info.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/plugin/Any23PluginManager.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/plugin/Author.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/plugin/ExtractorPlugin.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/plugin/package-info.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/rdf/Prefixes.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/source/DocumentSource.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/CSV.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/DCTERMS.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/DOAC.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/FOAF.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/GEO.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/HLISTING.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/HRECIPE.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/ICAL.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/OGP.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/REVIEW.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/SCHEMAORG.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/SINDICE.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/VCARD.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/Vocabulary.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/WO.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/XFN.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/XHTML.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/vocab/package-info.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/FormatWriter.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/TripleHandler.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/TripleHandlerException.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/Writer.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/WriterRegistry.java
Modified:
    incubator/any23/trunk/core/pom.xml
    incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Rover.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExampleInputOutput.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/JSONWriter.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/NQuadsWriter.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/NTriplesWriter.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/RDFXMLWriter.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/TriXWriter.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/TurtleWriter.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/URIListWriter.java
    incubator/any23/trunk/core/src/test/java/org/apache/any23/AbstractAny23TestBase.java
    incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/SingleDocumentExtractionTest.java
    incubator/any23/trunk/core/src/test/java/org/apache/any23/writer/WriterRegistryTest.java
    incubator/any23/trunk/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
    incubator/any23/trunk/pom.xml
    incubator/any23/trunk/service/src/main/java/org/apache/any23/servlet/WebResponder.java
    incubator/any23/trunk/test-resources/pom.xml

Added: incubator/any23/trunk/api/pom.xml
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/pom.xml?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/pom.xml (added)
+++ incubator/any23/trunk/api/pom.xml Mon Aug 13 06:15:29 2012
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <artifactId>apache-any23</artifactId>
+    <groupId>org.apache.any23</groupId>
+    <version>0.7.1-incubating-SNAPSHOT</version>
+    <relativePath>../</relativePath>
+  </parent>
+  <artifactId>apache-any23-api</artifactId>
+  <name>Apache Any23 :: Base API</name>
+  <dependencies>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.openrdf.sesame</groupId>
+      <artifactId>sesame-model</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.openrdf.sesame</groupId>
+      <artifactId>sesame-rio-api</artifactId>
+    </dependency>
+  </dependencies>
+</project>

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/cli/Tool.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/cli/Tool.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/cli/Tool.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/cli/Tool.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+/**
+ * Defines a runnable <i>CLI</i> tool.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public interface Tool {
+
+    /**
+     * Runs the tool and retrieves the exit code.
+     *
+     * @return exit code.
+     */
+    void run() throws Exception;
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/Configuration.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/Configuration.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/Configuration.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/Configuration.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.configuration;
+
+/**
+ * Defines the main <i>Any23</code> configuration.
+ */
+public interface Configuration {
+
+    /**
+     * Returns all the defined configuration properties.
+     *
+     * @return list of defined properties.
+     */
+    String[] getProperties();
+
+    /**
+     * Checks whether a property is defined or not in configuration.
+     *
+     * @param propertyName name of property to check.
+     * @return <code>true</code> if defined, </code>false</code> otherwise.
+     */
+    boolean defineProperty(String propertyName);
+
+    /**
+     * Returns the value of a specified property, of the default value if property is not defined.
+     *
+     * @param propertyName name of property
+     * @param defaultValue default value if not found.
+     * @return the value associated to <i>propertyName</i>.
+     */
+    String getProperty(String propertyName, String defaultValue);
+
+    /**
+     * Returns the value of the specified <code>propertyName</code> or raises an exception
+     * if <code>propertyName</code> is not defined.
+     *
+     * @param propertyName name of property to be returned.
+     * @return property value.
+     * @throws IllegalArgumentException if the property name is not defined
+     *                                  or the found property value is blank or empty.
+     */
+    String getPropertyOrFail(String propertyName);
+
+    /**
+     * Returns the {@link Integer} value of the specified <code>propertyName</code> or raises an exception
+     * if <code>propertyName</code> is not defined.
+     *
+     * @param propertyName name of property to be returned.
+     * @return property value.
+     * @throws NullPointerException if the property name is not defined.
+     * @throws IllegalArgumentException if the found property value is blank or empty.
+     * @throws NumberFormatException if the found property value is not a valid {@link Integer}.
+     */
+    int getPropertyIntOrFail(String propertyName);
+
+    /**
+     * Returns the value of a <i> flag property</i>. Such properties can assume only two values:
+     * <ul>
+     *     <li><code>on</code>  if flag is active   (<code>true</code> is returned).
+     *     <li><code>off</code> if flag is inactive (<code>false</code> is returned).
+     * </ul>
+     *
+     * @param propertyName name of property flag.
+     * @return <code>true</code> for <code>on</code>, <code>false</code> for <code>off</code>.
+     * @throws IllegalArgumentException if the <code>propertyName</code> is not declared.
+     */
+    boolean getFlagProperty(final String propertyName);
+
+    /**
+     * Returns a human readable string containing the configuration dump.
+     *
+     * @return a string describing the configuration options.
+     */
+    String getConfigurationDump();
+
+}
\ No newline at end of file

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.configuration;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+ * Default implementation of {@link Configuration}.
+ * The default property values are read from the
+ * <i>/default-configuration.properties</i> properties file
+ * in classpath.
+ *
+ * @author Michele Mostarda (michele.mostarda@gmail.com)
+ */
+public class DefaultConfiguration implements Configuration {
+
+    /**
+     * Default configuration file.
+     */
+    public static final String DEFAULT_CONFIG_FILE = "/default-configuration.properties";
+
+    public static final String FLAG_PROPERTY_ON  = "on";
+
+    public static final String FLAG_PROPERTY_OFF = "off";
+
+    protected static final Logger logger = LoggerFactory.getLogger(DefaultConfiguration.class);
+
+    protected static final DefaultConfiguration singleton = new DefaultConfiguration();
+
+    protected final Properties properties;
+
+    /**
+     * @return the singleton configuration instance.
+     *         Such instance is unmodifiable.
+     */
+    public static synchronized DefaultConfiguration singleton() {
+        return singleton;
+    }
+
+    /**
+     * @return a copy of the singleton instance. such instance is modifiable.
+     */
+    public static synchronized ModifiableConfiguration copy() {
+        final Properties propertiesCopy = (Properties) singleton.properties.clone();
+        return new DefaultModifiableConfiguration(propertiesCopy);
+    }
+
+    private static Properties loadDefaultProperties() {
+        final Properties properties = new Properties();
+        try {
+            properties.load( DefaultConfiguration.class.getResourceAsStream(DEFAULT_CONFIG_FILE) );
+        } catch (IOException ioe) {
+            throw new IllegalStateException("Error while loading default configuration.", ioe);
+        }
+        return properties;
+    }
+
+    protected DefaultConfiguration(Properties properties) {
+        this.properties = properties;
+    }
+
+    private DefaultConfiguration() {
+        this( loadDefaultProperties() );
+    }
+
+    public synchronized String[] getProperties() {
+        return properties.keySet().toArray( new String[properties.size()] );
+    }
+
+    public synchronized boolean defineProperty(String propertyName) {
+        return properties.containsKey(propertyName);
+    }
+
+    public synchronized String getProperty(String propertyName, String defaultValue) {
+        final String value = getPropertyValue(propertyName);
+        if(value == null) {
+            return defaultValue;
+        }
+        return value;
+    }
+
+    public synchronized String getPropertyOrFail(String propertyName) {
+        final String propertyValue = getPropertyValue(propertyName);
+        if(propertyValue == null) {
+            throw new IllegalArgumentException("The property '" + propertyName + "' is expected to be declared.");
+        }
+        if(  propertyValue.trim().length() == 0) {
+            throw new IllegalArgumentException(
+                    "Invalid value '" + propertyValue + "' for property '" + propertyName + "'"
+            );
+        }
+        return propertyValue;
+    }
+
+    public synchronized int getPropertyIntOrFail(String propertyName) {
+        final String value = getPropertyOrFail(propertyName);
+        final String trimValue = value.trim();
+        try {
+            return Integer.parseInt(trimValue);
+        } catch (NumberFormatException nfe) {
+            throw new NumberFormatException("The retrieved property is not a valid Integer: '" + trimValue + "'");
+        }
+    }
+
+    public synchronized boolean getFlagProperty(final String propertyName) {
+        final String value = getPropertyOrFail(propertyName);
+        if(value == null) {
+            return false;
+        }
+        if(FLAG_PROPERTY_ON.equals(value)) {
+            return true;
+        }
+        if(FLAG_PROPERTY_OFF.equals(value)) {
+            return false;
+        }
+        throw new IllegalArgumentException(
+                String.format(
+                    "Invalid value [%s] for flag property [%s]. Supported values are %s|%s",
+                    value, propertyName, FLAG_PROPERTY_ON, FLAG_PROPERTY_OFF
+                )
+        );
+    }
+
+    public synchronized String getConfigurationDump() {
+        final String[] defaultProperties = getProperties();
+        final StringBuilder sb = new StringBuilder();
+        sb.append("\n======================= Configuration Properties =======================\n");
+        for (String defaultProperty : defaultProperties) {
+            sb.append(defaultProperty).append('=').append(getPropertyValue(defaultProperty)).append('\n');
+        }
+        sb.append("========================================================================\n");
+        return sb.toString();
+    }
+
+    private String getPropertyValue(String propertyName) {
+        if( ! defineProperty(propertyName) ) {
+            if(logger.isDebugEnabled()) {
+                logger.debug(
+                        String.format(
+                                "Property '%s' is not declared in default configuration file [%s]",
+                                propertyName,
+                                DEFAULT_CONFIG_FILE
+                        )
+                );
+            }
+            return null;
+        }
+        final String systemValue = System.getProperties().getProperty(propertyName);
+        if(systemValue == null) {
+            return properties.getProperty(propertyName);
+        }
+        return systemValue;
+    }
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.configuration;
+
+import java.util.Properties;
+
+/**
+ * Default implementation of {@link ModifiableConfiguration}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class DefaultModifiableConfiguration extends DefaultConfiguration implements ModifiableConfiguration{
+
+    protected DefaultModifiableConfiguration(Properties properties) {
+        super(properties);
+    }
+
+    public synchronized String setProperty(String propertyName, String propertyValue) {
+        if( ! defineProperty(propertyName) ) throw new IllegalArgumentException(
+                String.format("Property '%s' is not defined in configuration.", propertyName)
+        );
+        return (String) properties.setProperty(propertyName, propertyValue);
+    }
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/ModifiableConfiguration.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/ModifiableConfiguration.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/ModifiableConfiguration.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/ModifiableConfiguration.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.configuration;
+
+/**
+ * Modifiable implementation of {@link Configuration}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public interface ModifiableConfiguration extends Configuration {
+
+    /**
+     * Sets a new value <code>propertyValue</code> for
+     * property which name <code>propertyName</code>.
+     *
+     * @param propertyName name of property.
+     * @param propertyValue value of property.
+     * @return the old property value.
+     */
+    String setProperty(String propertyName, String propertyValue);
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/package-info.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/package-info.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/package-info.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/configuration/package-info.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This packages contains the <i>Any23</i> <i>Configuration</i> definition.
+ */
+package org.apache.any23.configuration;
\ No newline at end of file

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/encoding/EncodingDetector.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/encoding/EncodingDetector.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/encoding/EncodingDetector.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/encoding/EncodingDetector.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.encoding;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Defines a detector for <i>charset encoding</i>.
+ *
+ * @author Michele Mostarda ( michele.mostarda@gmail.com )
+ */
+public interface EncodingDetector {
+
+    /**
+     * Guesses the data encoding.
+     *
+     * @param input the input stream containing the data.
+     * @return a string compliant to
+     *         <a href="http://www.iana.org/assignments/character-sets">IANA Charset Specification</a>.
+     */
+    String guessEncoding(InputStream input) throws IOException;
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/encoding/package-info.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/encoding/package-info.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/encoding/package-info.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/encoding/package-info.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This package defines utility classes for the <i>Encoding Detection</i>.
+ */
+package org.apache.any23.encoding;

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionContext.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionContext.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionContext.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionContext.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor;
+
+import org.openrdf.model.URI;
+
+/**
+ * This class provides the context for the processing of
+ * a single {@link Extractor}.
+ */
+public class ExtractionContext {
+
+    public static final String ROOT_EXTRACTION_RESULT_ID = "root-extraction-result-id";
+
+    /**
+     * Name of the extractor.
+     */
+    private final String extractorName;
+
+    /**
+     * URI of the document.
+     */
+    private final URI documentURI;
+
+    /**
+     * The document default language.
+     */
+    private String defaultLanguage;
+
+    /**
+     * ID identifying the document.
+     */
+    private final String uniqueID;
+
+    public ExtractionContext(String extractorName, URI documentURI, String defaultLanguage, String localID) {
+        checkNotNull(extractorName  , "extractor name");
+        checkNotNull(documentURI    , "document URI");
+        this.extractorName   = extractorName;
+        this.documentURI     = documentURI;
+        this.defaultLanguage = defaultLanguage;
+        this.uniqueID      =
+                "urn:x-any23:" + extractorName + ":" +
+                (localID == null ? "" : localID) + ":" + documentURI;
+    }
+
+    public ExtractionContext(String extractorName, URI documentURI, String defaultLanguage) {
+        this(extractorName, documentURI, defaultLanguage, ROOT_EXTRACTION_RESULT_ID);
+    }
+
+    public ExtractionContext(String extractorName, URI documentURI) {
+        this(extractorName, documentURI, null);
+    }
+
+    public ExtractionContext copy(String localID) {
+        return new ExtractionContext(
+                getExtractorName(),
+                getDocumentURI(),
+                getDefaultLanguage(),
+                localID
+        );
+    }
+
+    public String getExtractorName() {
+        return extractorName;
+    }
+
+    public URI getDocumentURI() {
+        return documentURI;
+    }
+
+    public String getDefaultLanguage() {
+        return defaultLanguage;
+    }
+
+    public String getUniqueID() {
+        return uniqueID;
+    }
+
+    public int hashCode() {
+        return uniqueID.hashCode();
+    }
+
+    public boolean equals(Object other) {
+        if (!(other instanceof ExtractionContext)) return false;
+        return ((ExtractionContext) other).uniqueID.equals(uniqueID);
+    }
+
+    public String toString() {
+        return "ExtractionContext(" + uniqueID + ")";
+    }
+
+    private void checkNotNull(Object data, String desc) {
+        if(data == null) throw new NullPointerException(desc + " cannot be null.");
+    }
+    
+}
\ No newline at end of file

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionException.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionException.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionException.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionException.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor;
+
+import java.io.PrintStream;
+import java.io.PrintWriter;
+
+/**
+ * Defines a specific exception raised during the metadata extraction phase.
+ *
+ * @author Michele Mostarda (michele.mostarda@gmail.com)
+ */
+public class ExtractionException extends Exception {
+
+    private ExtractionResult extractionResult;
+
+    public ExtractionException(String message) {
+        super(message);
+    }
+
+    public ExtractionException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public ExtractionException(String message, Throwable cause, ExtractionResult er) {
+        super(message, cause);
+        extractionResult = er;
+    }
+
+    @Override
+    public void printStackTrace(PrintStream ps) {
+        printExceptionResult( new PrintWriter(ps) );
+        super.printStackTrace(ps);
+    }
+
+    @Override
+    public void printStackTrace(PrintWriter pw) {
+        printExceptionResult(pw);
+        super.printStackTrace(pw);
+    }
+
+    private void printExceptionResult(PrintWriter ps) {
+        if(extractionResult == null) {
+            return;
+        }
+        ps.println();
+        ps.println("------------ BEGIN Exception context ------------");
+        ps.print( extractionResult.toString() );
+        ps.println("------------ END   Exception context ------------");
+        ps.println();
+        ps.flush();
+    }
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor;
+
+import org.apache.any23.configuration.Configuration;
+import org.apache.any23.configuration.DefaultConfiguration;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * This class models the parameters to be used to perform an extraction.
+ *
+ * @see org.apache.any23.Any23
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class ExtractionParameters {
+
+    /**
+     * @param c the underlying configuration.
+     * @return the default extraction parameters.
+     */
+    public static final ExtractionParameters newDefault(Configuration c) {
+        return new ExtractionParameters(c, ValidationMode.None);
+    }
+
+    /**
+     * Creates the default extraction parameters with {@link org.apache.any23.configuration.DefaultConfiguration}.
+     *
+     * @return the default extraction parameters.
+     */
+    public static final ExtractionParameters newDefault() {
+        return new ExtractionParameters(DefaultConfiguration.singleton(), ValidationMode.None);
+    }
+
+    /**
+     * Declares the supported validation actions.
+     */
+    public enum ValidationMode {
+        None,
+        Validate,
+        ValidateAndFix
+    }
+
+    private final Configuration configuration;
+
+    private final ValidationMode extractionMode;
+
+    private final Map<String, Boolean> extractionFlags;
+
+    private final Map<String,String> extractionProperties;
+
+    public static final String METADATA_DOMAIN_PER_ENTITY_FLAG  = "any23.extraction.metadata.domain.per.entity";
+
+    public static final String METADATA_NESTING_FLAG            = "any23.extraction.metadata.nesting";
+
+    public static final String METADATA_TIMESIZE_FLAG           = "any23.extraction.metadata.timesize";
+
+    public static final String EXTRACTION_CONTEXT_URI_PROPERTY = "any23.extraction.context.uri";
+
+    /**
+     * Constructor.
+     *
+     * @param configuration underlying configuration.
+     * @param extractionMode specifies the required extraction mode.
+     * @param extractionFlags map of specific flags used for extraction. If not specified they will
+     *        be retrieved by the default {@link org.apache.any23.configuration.Configuration}.
+     * @param extractionProperties map of specific properties used for extraction. If not specified
+     *        they will ne retrieved by the default {@link org.apache.any23.configuration.Configuration}.
+     */
+    public ExtractionParameters(
+            Configuration configuration,
+            ValidationMode extractionMode,
+            Map<String, Boolean> extractionFlags,
+            Map<String,String> extractionProperties
+    ) {
+        if(configuration == null) {
+            throw new NullPointerException("Configuration cannot be null.");
+        }
+        if(extractionMode == null) {
+            throw new NullPointerException("Extraction mode cannot be null.");
+        }
+        this.configuration  = configuration;
+        this.extractionMode = extractionMode;
+        this.extractionFlags =
+                extractionFlags == null
+                        ?
+                new HashMap<String,Boolean>()
+                        :
+                new HashMap<String,Boolean>(extractionFlags);
+        this.extractionProperties =
+                extractionProperties == null
+                        ?
+                new HashMap<String,String>()
+                        :
+                new HashMap<String,String>(extractionProperties);
+    }
+
+    /**
+     * Constructor.
+     *
+     * @param configuration underlying configuration.
+     * @param extractionMode specifies the required extraction mode.
+     */
+    public ExtractionParameters(Configuration configuration, ValidationMode extractionMode) {
+        this(configuration, extractionMode, null, null);
+    }
+
+    /**
+     * Constructor, allows to set explicitly the value for flag
+     * {@link SingleDocumentExtraction#METADATA_NESTING_FLAG}.
+     *
+     * @param configuration the underlying configuration.
+     * @param extractionMode specifies the required extraction mode.
+     * @param nesting if <code>true</code> nesting triples will be expressed.
+     */
+    public ExtractionParameters(Configuration configuration, ValidationMode extractionMode, final boolean nesting) {
+        this(
+                configuration,
+                extractionMode,
+                new HashMap<String, Boolean>(){{
+                    put(ExtractionParameters.METADATA_NESTING_FLAG, nesting);
+                }},
+                null
+        );
+    }
+
+    /**
+     * @return <code>true</code> if validation is active.
+     */
+    public boolean isValidate() {
+        return extractionMode == ValidationMode.Validate || extractionMode == ValidationMode.ValidateAndFix;
+    }
+
+    /**
+     * @return <code>true</code> if fix is active.
+     */
+    public boolean isFix() {
+        return extractionMode == ValidationMode.ValidateAndFix;
+    }
+
+    /**
+     * Returns the value of the specified extraction flag, if the flag is undefined
+     * it will be retrieved by the default {@link org.apache.any23.configuration.Configuration}.
+     *
+     * @param flagName name of flag.
+     * @return flag value.
+     */
+    public boolean getFlag(String flagName) {
+        final Boolean value = extractionFlags.get(flagName);
+        if(value == null) {
+            return configuration.getFlagProperty(flagName);
+        }
+        return value;
+    }
+
+    /**
+     * Sets the value for an extraction flag.
+     *
+     * @param flagName flag name.
+     * @param value new flag value.
+     * @return the previous flag value.
+     */
+    public Boolean setFlag(String flagName, boolean value) {
+        checkPropertyExists(flagName);
+        validateValue("flag name", flagName);
+        return extractionFlags.put(flagName, value);
+    }
+
+    /**
+     * Returns the value of the specified extraction property, if the property is undefined
+     * it will be retrieved by the default {@link org.apache.any23.configuration.Configuration}.
+     *
+     * @param propertyName the property name.
+     * @return the property value.
+     * @throws IllegalArgumentException if the property name is not defined in configuration.
+     */
+    public String getProperty(String propertyName) {
+        final String propertyValue = extractionProperties.get(propertyName);
+        if(propertyValue == null) {
+            return configuration.getPropertyOrFail(propertyName);
+        }
+        return propertyValue;
+    }
+
+    /**
+     * Sets the value for an extraction property.
+     *
+     * @param propertyName the property name.
+     * @param propertyValue the property value.
+     * @return the previous property value.
+     */
+    public String setProperty(String propertyName, String propertyValue) {
+        checkPropertyExists(propertyName);
+        validateValue("property name" , propertyName);
+        validateValue("property value", propertyValue);
+        return extractionProperties.put(propertyName, propertyValue);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if(obj == null) {
+            return false;
+        }
+        if(obj == this) {
+            return true;
+        }
+        if(obj instanceof ExtractionParameters) {
+            ExtractionParameters other = (ExtractionParameters) obj;
+            return
+                    extractionMode == other.extractionMode
+                            &&
+                    extractionFlags.equals( other.extractionFlags)
+                            &&
+                    extractionProperties.equals( other.extractionProperties );
+        }
+        return false;
+    }
+
+    @Override
+    public int hashCode() {
+        return extractionMode.hashCode() * 2 * extractionFlags.hashCode() * 3 * extractionProperties.hashCode() * 5;
+    }
+
+    private void checkPropertyExists(String propertyName) {
+        if(! configuration.defineProperty(propertyName) ) {
+            throw new IllegalArgumentException(
+                    String.format("Property '%s' is unknown and cannot be set.", propertyName)
+            );
+        }
+    }
+
+    private void validateValue(String desc, String value) {
+        if(value == null || value.trim().length() == 0)
+            throw new IllegalArgumentException( String.format("Invalid %s: '%s'", desc, value) );
+    }
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionResult.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionResult.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionResult.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractionResult.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor;
+
+import org.openrdf.model.Resource;
+import org.openrdf.model.URI;
+import org.openrdf.model.Value;
+
+/**
+ * Interface defining the methods that a representation of an extraction result must have.
+ */
+public interface ExtractionResult extends IssueReport {
+
+    /**
+     * Writes a triple.
+     * Parameters can be null, then the triple will be silently ignored.
+     *
+     * @param s subject
+     * @param p predicate
+     * @param o object
+     * @param g graph
+     */
+    void writeTriple(Resource s, URI p, Value o, URI g);
+
+    /**
+     * Write a triple.
+     * Parameters can be null, then the triple will be silently ignored.
+     *
+     * @param s subject
+     * @param p predicate
+     * @param o object
+     */
+    void writeTriple(Resource s, URI p, Value o);
+
+    /**
+     * Write a namespace.
+     *
+     * @param prefix the prefix of the namespace
+     * @param uri    the long URI identifying the namespace
+     */
+    void writeNamespace(String prefix, String uri);
+
+    /**
+     * Close the result.
+     * <p/>
+     * Extractors should close their results as soon as possible, but
+     * don't have to, the environment will close any remaining ones.
+     * Implementations should be robust against multiple close()
+     * invocations.
+     */
+    void close();
+
+    /**
+     * Open a result nested in the current one.
+     *
+     * @param extractionContext the context to be used to open the sub result.
+     * @return the instance of the nested extraction result.
+     */
+    ExtractionResult openSubResult(ExtractionContext extractionContext);
+
+}
\ No newline at end of file

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/Extractor.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/Extractor.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/Extractor.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/Extractor.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor;
+
+import org.openrdf.model.URI;
+import org.w3c.dom.Document;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * It defines the signature of a generic Extractor.
+ *
+ * @param <Input> the type of the input data to be processed.
+ */
+public interface Extractor<Input> {
+
+    /**
+     * This interface specializes an {@link Extractor} able to handle
+     * {@link java.net.URI} as input format. Use it if you need to fetch a document before the extraction
+     */
+    public interface BlindExtractor extends Extractor<URI> {
+    }
+
+    /**
+     * This interface specializes an {@link Extractor} able to handle
+     * {@link java.io.InputStream} as input format.
+     */
+    public interface ContentExtractor extends Extractor<InputStream> {
+        
+        /**
+         * If <code>true</code>, the extractor will stop at first parsing error,
+         * if<code>false</code> the extractor will attempt to ignore all parsing errors.
+         *
+         * @param f tolerance flag.
+         */
+        void setStopAtFirstError(boolean f);
+
+    }
+
+    /**
+     * This interface specializes an {@link Extractor} able to handle
+     * {@link org.w3c.dom.Document} as input format.
+     */
+    public interface TagSoupDOMExtractor extends Extractor<Document> {
+    }
+
+    /**
+     * Executes the extractor. Will be invoked only once, extractors are
+     * not reusable.
+     *
+     * @param extractionParameters the parameters to be applied during the extraction.
+     * @param context The document context.
+     * @param in The extractor input data.
+     * @param out the collector for the extracted data.
+     * @throws IOException On error while reading from the input stream.
+     * @throws ExtractionException On other error, such as parse errors.
+     */
+    void run(ExtractionParameters extractionParameters, ExtractionContext context, Input in, ExtractionResult out)
+    throws IOException, ExtractionException;
+
+    /**
+     * Returns a {@link ExtractorDescription} of this extractor.
+     *
+     * @return the object representing the extractor description.
+     */
+    ExtractorDescription getDescription();
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorDescription.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorDescription.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorDescription.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorDescription.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor;
+
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ *
+ * It defines a minimal signature for an
+ * {@link Extractor} description.
+ *
+ */
+public interface ExtractorDescription {
+
+    /**
+     * Returns the name of the extractor.
+     *
+     * @return a name.
+     */
+    String getExtractorName();
+
+    /**
+     * An instance defining the prefixes supported by this extractor.
+     *
+     * @return prefixes instance.
+     */
+    Prefixes getPrefixes();
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorFactory.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorFactory.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorFactory.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorFactory.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor;
+
+import org.apache.any23.mime.MIMEType;
+
+import java.util.Collection;
+
+/**
+ * Interface defining a factory for {@link Extractor}.
+ *
+ * @param <T> the type of the {@link Extractor} to be created by this factory.
+ */
+public interface ExtractorFactory<T extends Extractor<?>> extends ExtractorDescription {
+
+    /**
+     * Returns the extractor type.
+     *
+     * @return the not <code>null</code> extractor class.
+     */
+    Class<T> getExtractorType();
+
+    /**
+     * Creates an extractor instance.
+     *
+     * @return an instance of the extractor associated to this factory.
+     */
+    T createExtractor();
+
+    /**
+     * Supports wildcards, e.g. <code>"*&#47;*"</code> for blind extractors that merely call a web service.
+     */
+    Collection<MIMEType> getSupportedMIMETypes();
+
+    /**
+     * An example input file for the extractor, to be used in auto-generated
+     * documentation. For the {@link Extractor.BlindExtractor},
+     * this is an arbitrary URI.
+     * For extractors that require content, it is the name of a file, relative
+     * to the factory's class file's location, it will be opened using
+     * factory.getClass().getResourceAsStream(filename). The example should be
+     * a short file that produces characteristic output if sent through the
+     * extractor. The file will be read as UTF-8, so it should either use that
+     * encoding or avoid characters outside of the US-ASCII range.
+     */
+    String getExampleInput();
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor;
+
+import org.apache.any23.mime.MIMEType;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+
+/**
+ * It simple models a group of {@link ExtractorFactory} providing
+ * simple accessing methods.
+ */
+public class ExtractorGroup implements Iterable<ExtractorFactory<?>> {
+
+    private final Collection<ExtractorFactory<?>> factories;
+
+    public ExtractorGroup(Collection<ExtractorFactory<?>> factories) {
+        this.factories = factories;
+    }
+
+    public boolean isEmpty() {
+        return factories.isEmpty();
+    }
+
+    public int getNumOfExtractors() {
+        return factories.size();
+    }
+
+    /**
+     * Returns a {@link ExtractorGroup} with a set of {@link Extractor} able to
+     * process the provided mime type.
+     * 
+     * @param mimeType to perform the selection.
+     * @return an {@link ExtractorGroup} able to process the provided mime type.
+     */
+    public ExtractorGroup filterByMIMEType(MIMEType mimeType) {
+        // @@@ wildcards, q values
+        Collection<ExtractorFactory<?>> matching = new ArrayList<ExtractorFactory<?>>();
+        for (ExtractorFactory<?> factory : factories) {
+            if (supportsAllContentTypes(factory) || supports(factory, mimeType)) {
+                matching.add(factory);
+            }
+        }
+        return new ExtractorGroup(matching);
+    }
+
+    public Iterator<ExtractorFactory<?>> iterator() {
+        return factories.iterator();
+    }
+
+    /**
+     * @return <code>true</code> if all the {@link Extractor} contained in the group
+     * supports all the content types.
+     */
+    public boolean allExtractorsSupportAllContentTypes() {
+        for (ExtractorFactory<?> factory : factories) {
+            if (!supportsAllContentTypes(factory)) return false;
+        }
+        return true;
+    }
+
+    private boolean supportsAllContentTypes(ExtractorFactory<?> factory) {
+        return factory.getSupportedMIMETypes().contains("*/*");
+    }
+
+    private boolean supports(ExtractorFactory<?> factory, MIMEType mimeType) {
+        for (MIMEType supported : factory.getSupportedMIMETypes()) {
+            if (supported.isAnyMajorType()) return true;
+            if (supported.isAnySubtype() && supported.getMajorType().equals(mimeType.getMajorType())) return true;
+            if (supported.getFullType().equals(mimeType.getFullType())) return true;
+        }
+        return false;
+    }
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorRegistry.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorRegistry.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorRegistry.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorRegistry.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor;
+
+import java.util.List;
+
+/**
+ * An interface to the enable a registry for extractors to be implemented by
+ * different implementors of this API.
+ * 
+ * @author Peter Ansell p_ansell@yahoo.com
+ */
+public interface ExtractorRegistry {
+
+    /**
+     * Registers an {@link ExtractorFactory}.
+     * 
+     * @param factory
+     * @throws IllegalArgumentException
+     *             if trying to register a {@link ExtractorFactory} that already
+     *             exists in the registry.
+     */
+    void register(ExtractorFactory<?> factory);
+
+    /**
+     * 
+     * Retrieves a {@link ExtractorFactory} given its name
+     * 
+     * @param name
+     *            The name of the desired factory
+     * @return The {@link ExtractorFactory} associated to the provided name
+     * @throws IllegalArgumentException
+     *             If there is not an {@link ExtractorFactory} associated to the
+     *             provided name.
+     */
+    ExtractorFactory<?> getFactory(String name);
+
+    /**
+     * @return An {@link ExtractorGroup} with all the registered
+     *         {@link Extractor}.
+     */
+    ExtractorGroup getExtractorGroup();
+
+    /**
+     * Returns an {@link ExtractorGroup} containing the {@link ExtractorFactory}
+     * mathing the names provided as input.
+     * 
+     * @param names
+     *            A {@link java.util.List} containing the names of the desired
+     *            {@link ExtractorFactory}.
+     * @return the extraction group.
+     */
+    ExtractorGroup getExtractorGroup(List<String> names);
+
+    /**
+     * 
+     * @param name
+     *            The name of the {@link ExtractorFactory}
+     * @return <code>true</code> if is there a {@link ExtractorFactory}
+     *         associated to the provided name.
+     */
+    boolean isRegisteredName(String name);
+
+    /**
+     * Returns the names of all registered extractors, sorted alphabetically.
+     */
+    List<String> getAllNames();
+
+}
\ No newline at end of file

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/IssueReport.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/IssueReport.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/IssueReport.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/extractor/IssueReport.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor;
+
+import java.io.PrintStream;
+import java.util.Collection;
+
+/**
+ * This interface models an issue reporter.
+ *
+ * @author Michele Mostarda (michele.mostarda@gmail.com)
+ */
+public interface IssueReport {
+
+    /**
+     * Notifies an issue occurred while performing an extraction on an input stream.
+     *
+     * @param level issue level.
+     * @param msg   issue message.
+     * @param row   issue row.
+     * @param col   issue column.
+     */
+    void notifyIssue(IssueLevel level, String msg, int row, int col);
+
+    /**
+     * Prints out the content of the report.
+     *
+     * @param ps
+     */
+    void printReport(PrintStream ps);
+
+    /**
+     * Returns all the collected issues.
+     *
+     * @return a collection of {@link org.apache.any23.extractor.IssueReport.Issue}s.
+     */
+    Collection<Issue> getIssues();
+
+    /**
+     * Possible issue levels.
+     */
+    enum IssueLevel {
+        Warning,
+        Error,
+        Fatal
+    }
+
+    /**
+     * This class defines a generic issue traced by this extraction result.
+     */
+    public class Issue {
+
+        private IssueLevel level;
+        private String     message;
+        private int        row, col;
+
+        Issue(IssueLevel l, String msg, int r, int c) {
+            level = l;
+            message = msg;
+            row = r;
+            col = c;
+        }
+
+        public IssueLevel getLevel() {
+            return level;
+        }
+
+        public String getMessage() {
+            return message;
+        }
+
+        public int getRow() {
+            return row;
+        }
+
+        public int getCol() {
+            return col;
+        }
+
+        @Override
+        public String toString() {
+            return String.format("%s: \t'%s' \t(%d,%d)", level, message, row, col);
+        }
+    }
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/MIMEType.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/MIMEType.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/MIMEType.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/MIMEType.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.mime;
+
+/**
+ * A MIME type with an optional q (quality) value.
+ *
+ * @author Richard Cyganiak (richard@cyganiak.de)
+ */
+public class MIMEType implements Comparable<MIMEType> {
+
+    private final static String MSG = "Cannot parse MIME type (expected type/subtype[;q=x.y] format): ";
+
+    private final String type;
+
+    private final String subtype;
+    
+    private final double q;
+
+    /**
+     * Parses the given MIME type string returning an instance of
+     * {@link MIMEType}.
+     * The expected format for <code>mimeType</code> is
+     * <code>type/subtype[;q=x.y]</code> .
+     * An example of valid mime type is: <code>application/rdf+xml;q=0.9</code> 
+     *
+     * @param mimeType
+     * @return the mime type instance.
+     * @throws IllegalArgumentException if the <code>mimeType</code> is not well formatted.
+     */
+    public static MIMEType parse(String mimeType) {
+        if (mimeType == null) return null;
+        int i = mimeType.indexOf(';');
+        double q = 1.0;
+        if (i > -1) {
+            String[] params = mimeType.substring(i + 1).split(";");
+            for (String param : params) {
+                int i2 = param.indexOf('=');
+                if (i2 == -1) continue;
+                if (!"q".equals(param.substring(0, i2).trim().toLowerCase())) continue;
+                String value = param.substring(i2 + 1);
+                try {
+                    q = Double.parseDouble(value);
+                } catch (NumberFormatException ex) {
+                    continue;
+                }
+                if (q <= 0.0 || q >= 1.0) {
+                    q = 1.0;
+                }
+            }
+        } else {
+            i = mimeType.length();
+        }
+        String type = mimeType.substring(0, i);
+        int i2 = type.indexOf('/');
+        if (i2 == -1) {
+            throw new IllegalArgumentException(MSG + mimeType);
+        }
+        String p1 = type.substring(0, i2).trim().toLowerCase();
+        String p2 = type.substring(i2 + 1).trim().toLowerCase();
+        if ("*".equals(p1)) {
+            if (!"*".equals(p2)) {
+                throw new IllegalArgumentException(MSG + mimeType);
+            }
+            return new MIMEType(null, null, q);
+        }
+        if ("*".equals(p2)) {
+            return new MIMEType(p1, null, q);
+        }
+        return new MIMEType(p1, p2, q);
+    }
+
+    private MIMEType(String type, String subtype, double q) {
+        this.type = type;
+        this.subtype = subtype;
+        this.q = q;
+    }
+
+    public String getMajorType() {
+        return (type == null ? "*" : type);
+    }
+
+    public String getSubtype() {
+        return (subtype == null ? "*" : subtype);
+    }
+
+    public String getFullType() {
+        return getMajorType() + "/" + getSubtype();
+    }
+
+    public double getQuality() {
+        return q;
+    }
+
+    public boolean isAnyMajorType() {
+        return type == null;
+    }
+
+    public boolean isAnySubtype() {
+        return subtype == null;
+    }
+
+    public String toString() {
+        if (q == 1.0) {
+            return getFullType();
+        }
+        return getFullType() + ";q=" + q;
+    }
+
+    public int compareTo(MIMEType other) {
+        return getFullType().compareTo(other.getFullType());
+    }
+    
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/MIMETypeDetector.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/MIMETypeDetector.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/MIMETypeDetector.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/MIMETypeDetector.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.mime;
+
+import java.io.InputStream;
+
+/**
+ * This detector is able to estimate the <code>MIME</code> type of
+ * some given raw data. 
+ */
+public interface MIMETypeDetector {
+
+    /**
+     * Estimates the <code>MIME</code> type of the content of input file.
+     *
+     * @param fileName name of the file.
+     * @param input content of the file.
+     * @param mimeTypeFromMetadata mimetype declared in metadata.
+     * @return the supposed mime type or <code>null</code> if nothing appropriate found.
+     */
+    public MIMEType guessMIMEType(String fileName, InputStream input, MIMEType mimeTypeFromMetadata);
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/purifier/Purifier.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/purifier/Purifier.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/purifier/Purifier.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/purifier/Purifier.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.mime.purifier;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * This interface defines a minimum set of methods that
+ * a {@link org.apache.any23.mime.TikaMIMETypeDetector} could
+ * call in order to clean the input before performing the <i>MIME type</i>
+ * detection.
+ * 
+ * @author Davide Palmisano ( dpalmisano@gmail.com )
+ */
+public interface Purifier {
+
+    /**
+     * Performs the purification of the provided resettable {@link java.io.InputStream}.
+     * 
+     * @param inputStream a resettable {@link java.io.InputStream} to be cleaned.
+     */
+    void purify(InputStream inputStream) throws IOException;
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/purifier/package-info.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/purifier/package-info.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/purifier/package-info.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/mime/purifier/package-info.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This package contains all the logic to implement your own {@link org.apache.any23.mime.purifier.Purifier}.
+ * Each {@link org.apache.any23.mime.purifier.Purifier} is responsible for modifying the file <b>before</b>
+ * its <i>MIME type</i> is detected.
+ */
+package org.apache.any23.mime.purifier;
\ No newline at end of file



Mime
View raw message