incubator-any23-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ans...@apache.org
Subject svn commit: r1372269 [3/4] - in /incubator/any23/trunk: ./ api/ api/src/ api/src/main/ api/src/main/java/ api/src/main/java/org/ api/src/main/java/org/apache/ api/src/main/java/org/apache/any23/ api/src/main/java/org/apache/any23/cli/ api/src/main/java...
Date Mon, 13 Aug 2012 06:15:33 GMT
Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/REVIEW.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/REVIEW.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/REVIEW.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/REVIEW.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * Vocabulary definitions from vocabularies/review.rdf
+ */
+public class REVIEW extends Vocabulary {
+
+    private static REVIEW instance;
+
+    public static REVIEW getInstance() {
+        if(instance == null) {
+            instance = new REVIEW();
+        }
+        return instance;
+    }
+
+    /**
+     * The namespace of the vocabulary as a string.
+     */
+    public static final String NS = "http://purl.org/stuff/rev#";
+
+    /**
+     * The namespace of the vocabulary as a URI.
+     */
+    public final URI NAMESPACE = createURI(NS);
+
+    /**
+     * The commenter on the review.
+     */
+    public final URI commenter =  createProperty("commenter");
+
+    /**
+     * Used to associate a review with a comment on the review.
+     */
+    public final URI hasComment = createProperty("hasComment");
+
+    /**
+     * Associates a review with a feedback on the review.
+     */
+    public final URI hasFeedback = createProperty("hasFeedback");
+
+    /**
+     * Associates a work with a a review.
+     */
+    public final URI hasReview = createProperty("hasReview");
+
+    /**
+     * A numeric value.
+     */
+    public final URI maxRating = createProperty("maxRating");
+
+    /**
+     * A numeric value.
+     */
+    public final URI minRating = createProperty("minRating");
+
+    /**
+     * Number of positive usefulness votes (integer).
+     */
+    public final URI positiveVotes = createProperty("positiveVotes");
+
+    /**
+     * A numeric value.
+     */
+    public final URI rating = createProperty("rating");
+
+    /**
+     * The person that has written the review.
+     */
+    public final URI reviewer = createProperty("reviewer");
+
+    /**
+     * The text of the review.
+     */
+    public final URI text = createProperty("text");
+
+    /**
+     * The title of the review.
+     */
+    public final URI title = createProperty("title");
+
+    /**
+     * Number of usefulness votes (integer).
+     */
+    public final URI totalVotes = createProperty("totalVotes");
+
+    /**
+     * The type of media of a work under review.
+     */
+    public final URI type = createProperty("type");
+
+    /**
+     * A comment on a review.
+     */
+    public final URI Comment = createProperty("Comment");
+
+    /**
+     * Feedback on the review. Expresses whether the review was useful or not.
+     */
+    public final URI Feedback = createProperty("Feedback");
+
+    /**
+     * A review of an work.
+     */
+    public final URI Review = createProperty("Review");
+
+    private URI createProperty(String localName) {
+        return createProperty(NS, localName);
+    }
+
+    private REVIEW(){
+        super(NS);
+    }
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/SCHEMAORG.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/SCHEMAORG.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/SCHEMAORG.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/SCHEMAORG.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+/**
+ * Vocabulary definition for <a href="http://schema.org/">schema.org</a>.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class SCHEMAORG extends Vocabulary {
+
+    /**
+     * The namespace of the vocabulary as a string.
+     */
+    public static final String NS = "http://schema.org/";
+
+    private static SCHEMAORG instance;
+
+    public static SCHEMAORG getInstance() {
+        if(instance == null) {
+            instance = new SCHEMAORG();
+        }
+        return instance;
+    }
+
+    private SCHEMAORG(){
+        super(NS);
+    }
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/SINDICE.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/SINDICE.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/SINDICE.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/SINDICE.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * This class models an internal <i>Sindice</i> Vocabulary to describe
+ * resource domains and Microformat nesting relationships.
+ * See the <a href="http://developers.any23.org/extraction.html">Any23 extraction notes</a>.
+ *
+ * @author Davide Palmisano (dpalmisano@gmail.com)
+ * @author Michele Mostarda (michele.mostarda@gmail.com)
+ */
+public class SINDICE extends Vocabulary {
+
+    public static final String DOMAIN = "domain";
+
+    public static final String NESTING = "nesting";
+
+    public static final String NESTING_ORIGINAL = "nesting_original";
+
+    public static final String NESTING_STRUCTURED = "nesting_structured";
+
+    public static final String SIZE = "size";
+
+    public static final String DATE = "date";
+
+    /**
+     * The namespace of the vocabulary as a string.
+     */
+    public static final String NS = "http://vocab.sindice.net/any23#";
+
+    private static SINDICE instance;
+
+    public static SINDICE getInstance() {
+        if(instance == null) {
+            instance = new SINDICE();
+        }
+        return instance;
+    }
+
+    /**
+     * The namespace of the vocabulary as a URI.
+     */
+    public final URI NAMESPACE = createURI(NS);
+
+    /**
+     * This property expresses the DNS domain of the resource on which
+     * it is applied. It is intended to be used to keep track of the domain provenance
+     * of each resource.
+     */
+    public final URI domain = createProperty(DOMAIN);
+
+    /**
+     * This property links a resource with a <i>blank node</i> that represents
+     * a nested <i>Microformat</i> node.
+     */
+    public final URI nesting = createProperty(NESTING);
+
+    /**
+     * This property is used to keep track of the original nested <i>RDF property</i>.
+     */
+    public final URI nesting_original = createProperty(NESTING_ORIGINAL);
+
+    /**
+     * This property links the resource with a <i>node</i> representing the nested <i>Microformat</i>
+     * 
+     */
+    public final URI nesting_structured = createProperty(NESTING_STRUCTURED);
+
+    /**
+     * Size meta property indicating the number of triples within the returned dataset.
+     */
+    public final URI size = createProperty(SIZE);
+
+    /**
+     * Date meta property indicating the data generation time.
+     */
+    public final URI date = createProperty(DATE);
+
+
+    private URI createClass(String localName) {
+        return createClass(NS, localName);
+    }
+
+    private URI createProperty(String localName) {
+        return createProperty(NS, localName);
+    }
+
+    private SINDICE(){
+        super(NS);
+    }
+
+}
\ No newline at end of file

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/VCARD.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/VCARD.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/VCARD.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/VCARD.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,358 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * Vocabulary definitions from vcard.owl
+ */
+public class VCARD extends Vocabulary {
+
+    /**
+     * The namespace of the vocabulary as a string.
+     */
+    public static final String NS = "http://www.w3.org/2006/vcard/ns#";
+
+    private static VCARD instance;
+
+    public static VCARD getInstance() {
+        if(instance == null) {
+            instance = new VCARD();
+        }
+        return instance;
+    }
+
+    /**
+     * The namespace of the vocabulary as a URI.
+     */
+    public final URI NAMESPACE = createURI(NS);
+
+    /**
+     * An additional part of a person's name.
+     */
+    public final URI additional_name = createProperty("additional-name");
+
+    /**
+     * A postal or street address of a person.
+     */
+    public final URI adr = createProperty("adr");
+
+    /**
+     * A person that acts as one's agent.
+     */
+    public final URI agent = createProperty("agent");
+
+    /**
+     * The birthday of a person.
+     */
+    public final URI bday = createProperty("bday");
+
+    /**
+     * A category of a vCard.
+     */
+    public final URI category = createProperty("category");
+
+    /**
+     * A class (e.g., public, private, etc.) of a vCard.
+     */
+    public final URI class_ = createProperty("class");
+
+    /**
+     * The country of a postal address.
+     */
+    public final URI country_name = createProperty("country-name");
+
+    /**
+     * An email address.
+     */
+    public final URI email = createProperty("email");
+
+    /**
+     * The extended address of a postal address.
+     */
+    public final URI extended_address = createProperty("extended-address");
+
+    /**
+     * A family name part of a person's name.
+     */
+    public final URI family_name = createProperty("family-name");
+
+    /**
+     * A fax number of a person.
+     */
+    public final URI fax = createProperty("fax");
+
+    /**
+     * A formatted name of a person.
+     */
+    public final URI fn = createProperty("fn");
+
+    /**
+     * A geographic location associated with a person.
+     */
+    public final URI geo = createProperty("geo");
+
+    /**
+     * A given name part of a person's name.
+     */
+    public final URI given_name = createProperty("given-name");
+
+    /**
+     * A home address of a person.
+     */
+    public final URI homeAdr = createProperty("homeAdr");
+
+    /**
+     * A home phone number of a person.
+     */
+    public final URI homeTel = createProperty("homeTel");
+
+    /**
+     * An honorific prefix part of a person's name.
+     */
+    public final URI honorific_prefix = createProperty("honorific-prefix");
+
+    /**
+     * An honorific suffix part of a person's name.
+     */
+    public final URI honorific_suffix = createProperty("honorific-suffix");
+
+    /**
+     * A key (e.g, PKI key) of a person.
+     */
+    public final URI key = createProperty("key");
+
+    /**
+     * The formatted version of a postal address (a string with embedded line breaks,
+     * punctuation, etc.).
+     */
+    public final URI label = createProperty("label");
+
+    /**
+     * The latitude of a geographic location.
+     */
+    public final URI latitude = createProperty("latitude");
+
+    /**
+     * The locality (e.g., city) of a postal address.
+     */
+    public final URI locality = createProperty("locality");
+
+    /**
+     * A logo associated with a person or their organization.
+     */
+    public final URI logo = createProperty("logo");
+
+    /**
+     * The longitude of a geographic location.
+     */
+    public final URI longitude = createProperty("longitude");
+
+    /**
+     * A mailer associated with a vCard.
+     */
+    public final URI mailer = createProperty("mailer");
+
+    /**
+     * A mobile email address of a person.
+     */
+    public final URI mobileEmail = createProperty("mobileEmail");
+
+    /**
+     * A mobile phone number of a person.
+     */
+    public final URI mobileTel = createProperty("mobileTel");
+
+    /**
+     * The components of the name of a person.
+     */
+    public final URI n = createProperty("n");
+
+    /**
+     * The nickname of a person.
+     */
+    public final URI nickname = createProperty("nickname");
+
+    /**
+     * Notes about a person on a vCard.
+     */
+    public final URI note = createProperty("note");
+
+    /**
+     * An organization associated with a person.
+     */
+    public final URI org = createProperty("org");
+
+    /**
+     * The name of an organization.
+     */
+    public final URI organization_name = createProperty("organization-name");
+
+    /**
+     * The name of a unit within an organization.
+     */
+    public final URI organization_unit = createProperty("organization-unit");
+
+    /**
+     * An email address unaffiliated with any particular organization or employer;
+     * a personal email address.
+     */
+    public final URI personalEmail = createProperty("personalEmail");
+
+    /**
+     * A photograph of a person.
+     */
+    public final URI photo = createProperty("photo");
+
+    /**
+     * The post office box of a postal address.
+     */
+    public final URI post_office_box = createProperty("post-office-box");
+
+    /**
+     * The postal code (e.g., U.S. ZIP code) of a postal address.
+     */
+    public final URI postal_code = createProperty("postal-code");
+
+    /**
+     * The region (e.g., state or province) of a postal address.
+     */
+    public final URI region = createProperty("region");
+
+    /**
+     * The timestamp of a revision of a vCard.
+     */
+    public final URI rev = createProperty("rev");
+
+    /**
+     * A role a person plays within an organization.
+     */
+    public final URI role = createProperty("role");
+
+    /**
+     * A version of a person's name suitable for collation.
+     */
+    public final URI sort_string = createProperty("sort-string");
+
+    /**
+     * A sound (e.g., a greeting or pronounciation) of a person.
+     */
+    public final URI sound = createProperty("sound");
+
+    /**
+     * The street address of a postal address.
+     */
+    public final URI street_address = createProperty("street-address");
+
+    /**
+     * A telephone number of a person.
+     */
+    public final URI tel = createProperty("tel");
+
+    /**
+     * A person's title.
+     */
+    public final URI title = createProperty("title");
+
+    /**
+     * A timezone associated with a person.
+     */
+    public final URI tz = createProperty("tz");
+
+    /**
+     * A UID of a person's vCard.
+     */
+    public final URI uid = createProperty("uid");
+
+    /**
+     * An (explicitly) unlabeled address of a person.
+     */
+    public final URI unlabeledAdr = createProperty("unlabeledAdr");
+
+    /**
+     * An (explicitly) unlabeled email address of a person.
+     */
+    public final URI unlabeledEmail = createProperty("unlabeledEmail");
+
+    /**
+     * An (explicitly) unlabeled phone number of a person.
+     */
+    public final URI unlabeledTel = createProperty("unlabeledTel");
+
+    /**
+     * A URL associated with a person.
+     */
+    public final URI url = createProperty("url");
+
+    /**
+     * A work address of a person.
+     */
+    public final URI workAdr = createProperty("workAdr");
+
+    /**
+     * A work email address of a person.
+     */
+    public final URI workEmail = createProperty("workEmail");
+
+    /**
+     * A work phone number of a person.
+     */
+    public final URI workTel = createProperty("workTel");
+
+    /**
+     * Resources that are vCard (postal) addresses.
+     */
+    public final URI Address = createURI("http://www.w3.org/2006/vcard/ns#Address");
+
+    public final URI addressType = createProperty("addressType");
+
+    /**
+     * Resources that are vCard Telephones.
+     */
+    public final URI Telephone = createURI("http://www.w3.org/2006/vcard/ns#Address");
+
+    /**
+     * Resources that are vCard geographic locations.
+     */
+    public final URI Location = createURI("http://www.w3.org/2006/vcard/ns#Location");
+
+    /**
+     * Resources that are vCard personal names.
+     */
+    public final URI Name = createURI("http://www.w3.org/2006/vcard/ns#Name");
+
+    /**
+     * Resources that are vCard organizations.
+     */
+    public final URI Organization = createURI("http://www.w3.org/2006/vcard/ns#Organization");
+
+    /**
+     * Resources that are vCards
+     */
+    public final URI VCard = createURI("http://www.w3.org/2006/vcard/ns#VCard");
+
+
+    private URI createProperty(String localName) {
+        return createProperty(NS, localName);
+    }
+
+    public VCARD(){
+        super(NS);
+    }
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/Vocabulary.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/Vocabulary.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/Vocabulary.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/Vocabulary.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+import org.openrdf.model.impl.ValueFactoryImpl;
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+import java.lang.reflect.Field;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import static java.lang.annotation.ElementType.FIELD;
+import static java.lang.annotation.RetentionPolicy.RUNTIME;
+
+/**
+ * Base class for the definition of a vocabulary.
+ *
+ * @author Michele Mostarda ( michele.mostarda@gmail.com )
+ * @version $Id$
+ */
+public abstract class Vocabulary {
+
+    /**
+     * Allows to add comments to <code>namespaces</code>,
+     * <code>classes</code> and <code>properties</code>.
+     */
+    @Target({FIELD})
+    @Retention(RUNTIME)
+    @interface Comment {
+        String value();
+    }
+
+    /**
+     * Vocabulary namespace.
+     */
+    private final URI namespace;
+
+    /**
+     * Map of vocabulary resources.
+     */
+    private Map<String,URI> classes;
+
+    /**
+     * Map of vocabulary properties.
+     */
+    private Map<String,URI> properties;
+
+    /**
+     * Map any resource with the relative comment.
+     */
+    private Map<URI,String> resourceToCommentMap;
+
+    /**
+     * Constructor.
+     *
+     * @param namespace the namespace URI prefix.
+     */
+    public Vocabulary(String namespace) {
+        try {
+        this.namespace =  ValueFactoryImpl.getInstance().createURI(namespace);
+        } catch (Exception e) {
+            throw new IllegalArgumentException("Invalid namespace '" + namespace + "'", e);
+        }
+    }
+
+    /**
+     * @return the namespace associated to this vocabulary.
+     */
+    public URI getNamespace() {
+        return namespace;
+    }
+
+    /**
+     * Returns a class defined within this vocabulary.
+     *
+     * @param name class name.
+     * @return the URI associated to such resource.
+     */
+    public URI getClass(String name) {
+        URI res = classes.get(name);
+        if (null == res) {
+            throw new IllegalArgumentException("Unknown resource name '" + name + "'");
+        }
+        return res;
+    }
+
+    /**
+     * Returns a property defined within this vocabulary.
+     *
+     * @param name property name.
+     * @return the URI associated to such property.
+     */
+    public URI getProperty(String name) {
+        URI prop = properties.get(name);
+        if (null == prop) {
+            throw new IllegalArgumentException("Unknown property name '" + name + "'");
+        }
+        return prop;
+    }
+
+    /**
+     * Returns a property defined within this vocabulary, if not found the
+     * <code>defaultValue</code> will be returned.
+     *
+     * @param name property name.
+     * @param defaultValue the default value if property name not found.
+     * @return the URI associated to such property.
+     */
+    public URI getProperty(String name, URI defaultValue) {
+        URI prop = properties.get(name);
+        if (null == prop) {
+            return defaultValue;
+        }
+        return prop;
+    }
+
+    /**
+     * Returns the property URI for the specified property string.
+     * If the string contains a list of words separated by blank chars,
+     * such words are merged and camel case separated.
+     *
+     * @param property property name.
+     * @return property URI.
+     */
+    public URI getPropertyCamelCase(String property) {
+        String[] names = property.split("\\W");
+        String camelCase = names[0];
+        for (int i = 1; i < names.length; i++) {
+            String tmp = names[i];
+            camelCase += tmp.replaceFirst("(.)", tmp.substring(0, 1).toUpperCase());
+        }
+        return getProperty(camelCase);
+    }
+
+    /**
+     * @return the list of all defined classes.
+     */
+    public URI[] getClasses() {
+        if(classes == null) {
+            return new URI[0];
+        }
+        final Collection<URI> uris = classes.values();
+        return uris.toArray( new URI[ uris.size() ] );
+    }
+
+    /**
+     * @return the list of all defined properties.
+     */
+    public URI[] getProperties() {
+        if(properties == null) {
+            return new URI[0];
+        }
+        final Collection<URI> uris = properties.values();
+        return uris.toArray( new URI[ uris.size() ] );
+    }
+
+    /**
+     * Returns all the defined comments for resources.
+     *
+     * @return unmodifiable list of comments.
+     */
+    public Map<URI,String> getComments() {
+        fillResourceToCommentMap();
+        return Collections.unmodifiableMap(resourceToCommentMap);
+    }
+
+    /**
+     * Returns the comment for the given resource.
+     *
+     * @param resource input resource to have a comment.
+     * @return the human readable comment associated to the
+     *         given resource.
+     */
+    public String getCommentFor(URI resource) {
+        fillResourceToCommentMap();
+        return resourceToCommentMap.get(resource);
+    }
+    
+    /**
+     * Creates a URI.
+     *
+     * @param uriStr the URI string
+     * @return the URI instance.
+     */
+    protected URI createURI(String uriStr) {
+        return ValueFactoryImpl.getInstance().createURI(uriStr);
+    }
+
+    /**
+     * Creates a resource and register it to the {@link #classes} map.
+     *
+     * @param namespace vocabulary namespace.
+     * @param resource name of the resource.
+     * @return the created resource URI.
+     */
+    protected URI createClass(String namespace, String resource) {
+        URI res = createURI(namespace, resource);
+        if(classes == null) {
+            classes = new HashMap<String, URI>(10);
+        }
+        classes.put(resource, res);
+        return res;
+    }
+
+    /**
+     * Creates a property and register it to the {@link #properties} map.
+     *
+     * @param namespace vocabulary namespace.
+     * @param property name of the property.
+     * @return the created property URI.
+     */
+    protected URI createProperty(String namespace, String property) {
+        URI res = createURI(namespace, property);
+        if(properties == null) {
+            properties = new HashMap<String, URI>(10);
+        }
+        properties.put(property, res);
+        return res;
+    }
+
+    /**
+     * Creates a URI.
+     *
+     * @param namespace
+     * @param localName
+     * @return
+     */
+    private URI createURI(String namespace, String localName) {
+        return ValueFactoryImpl.getInstance().createURI(namespace, localName);
+    }
+
+    private void fillResourceToCommentMap() {
+        if(resourceToCommentMap != null) return;
+        final Map<URI,String> newMap = new HashMap<URI, String>();
+        for (Field field : this.getClass().getFields()) {
+            try {
+                final Object value = field.get(this);
+                if(value instanceof URI) {
+                    final Comment comment = field.getAnnotation(Comment.class);
+                    if(comment != null) newMap.put((URI) value, comment.value());
+                }
+            } catch (IllegalAccessException iae) {
+                throw new RuntimeException("Error while creating resource to comment map.", iae);
+            }
+        }
+        resourceToCommentMap = newMap;
+    }
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/WO.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/WO.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/WO.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/WO.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * This class models the <a href="http://purl.org/ontology/wo/">BBC Wildlife Ontology</a>.
+ *
+ * @author Davide Palmisano (dpalmisano@gmail.com)
+ */
+public class WO extends Vocabulary {
+
+    /**
+     * The namespace of the vocabulary as a string.
+     */
+    public static final String NS = "http://purl.org/ontology/wo/";
+
+    private static WO instance;
+
+    public static WO getInstance() {
+        if(instance == null) {
+            instance = new WO();
+        }
+        return instance;
+    }
+
+    /**
+     * The namespace of the vocabulary as a URI.
+     */
+    public final URI NAMESPACE = createURI(NS);
+
+    /**
+     * Generic class defining a biological species
+     */
+    public final URI species = createProperty("species");
+
+    public final URI kingdomClass = createClass("Kingdom");
+
+    public final URI divisionClass = createClass("Division");
+
+    public final URI phylumClass = createClass("Phylum");
+
+    public final URI orderClass = createClass("Order");
+
+    public final URI genusClass = createClass("Genus");
+
+    public final URI classClass = createClass("Class");
+
+    /**
+     * A family is a scientific grouping of closely related organisms.
+     * It has smaller groups, called genera and species, within it.
+     * A family can have a lot of members or only a few.
+     * Examples of families include the cats (Felidae), the gulls (Laridae) and the grasses (Poaceae).
+     */
+    public final URI family = createClass("Family");
+
+    /**
+     * associates a taxon rank with a family 
+     */
+    public final URI familyProperty = createProperty("family");
+
+    /**
+     * Used to specify the name of a family as part of a Taxon Name
+     */
+    public final URI familyName = createProperty("familyName");
+
+    /**
+     * specifies the species part of a binomial name, allowing
+     * this portion of the name to be explicitly described.
+     * Therefore this property will typically only be used in TaxonNames
+     * associated with species. The property is largely provided as a 
+     * convenience to avoid applications having to parse the binomial name.
+     */
+    public final URI speciesName = createProperty("speciesName");
+
+    /**
+     * specifies the scientific name of a species, allowing
+     * this portion of the name to be explicitly described.
+     * Therefore this property will typically only be used in TaxonNames
+     * associated with species. The property is largely provided as a
+     * convenience to avoid applications having to parse the binomial name.
+     */
+    public final URI scientificName = createProperty("scientificName");
+
+    public final URI kingdom = createProperty("kingdom");
+
+    public final URI phylum = createProperty("phylum");
+
+    public final URI order = createProperty("order");
+
+    public final URI genus = createProperty("genus");
+
+    public final URI division = createProperty("division");
+
+    public final URI clazz = createProperty("class");
+
+    public final URI kingdomName = createProperty("kingdomName");
+
+    public final URI phylumName = createProperty("phylumName");
+
+    public final URI orderName = createProperty("orderName");
+
+    public final URI genusName = createProperty("genusName");
+
+    public final URI divisionName = createProperty("divisionName");
+
+    public final URI clazzName = createProperty("className");
+
+    private URI createClass(String name) {
+        return createClass(NS, name);
+    }
+
+    private URI createProperty(String name) {
+        return createProperty(NS, name);
+    }
+
+    private WO(){
+        super(NS);
+    }
+
+}
\ No newline at end of file

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/XFN.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/XFN.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/XFN.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/XFN.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Vocabulary class for <a href="http://gmpg.org/xfn/11">XFN</a>, as per
+ * <a href="http://vocab.sindice.com/xfn/guide.html">Expressing XFN in RDF</a>.
+ *
+ * @author Richard Cyganiak (richard@cyganiak.de)
+ */
+public class XFN extends Vocabulary {
+
+    public static final String NS = "http://vocab.sindice.com/xfn#";
+
+    private static XFN instance;
+
+    public static XFN getInstance() {
+        if(instance == null) {
+            instance = new XFN();
+        }
+        return instance;
+    }
+
+    public final URI contact      = createProperty("contact");
+    public final URI acquaintance = createProperty("acquaintance");
+    public final URI friend       = createProperty("friend");
+    public final URI met          = createProperty("met");
+    public final URI coWorker     = createProperty("co-worker");
+    public final URI colleague    = createProperty("colleague");
+    public final URI coResident   = createProperty("co-resident");
+    public final URI neighbor     = createProperty("neighbor");
+    public final URI child        = createProperty("child");
+    public final URI parent       = createProperty("parent");
+    public final URI spouse       = createProperty("spouse");
+    public final URI kin          = createProperty("kin");
+    public final URI muse         = createProperty("muse");
+    public final URI crush        = createProperty("crush");
+    public final URI date         = createProperty("date");
+    public final URI sweetheart   = createProperty("sweetheart");
+    public final URI me           = createProperty("me");
+
+    public final URI mePage = createProperty(NS, "mePage");
+
+    private  Map<String, URI> PeopleXFNProperties;
+
+    private Map<String, URI> HyperlinkXFNProperties;
+
+    public URI getPropertyByLocalName(String localName) {
+        return PeopleXFNProperties.get(localName);
+    }
+
+    public URI getExtendedProperty(String localName) {
+        return HyperlinkXFNProperties.get(localName);
+    }
+
+    public boolean isXFNLocalName(String localName) {
+        return PeopleXFNProperties.containsKey(localName);
+    }
+
+    public boolean isExtendedXFNLocalName(String localName) {
+        return PeopleXFNProperties.containsKey(localName);
+    }
+
+    private URI createProperty(String localName) {
+        if(HyperlinkXFNProperties == null) {
+            HyperlinkXFNProperties = new HashMap<String, URI>();
+        }
+        if(PeopleXFNProperties == null) {
+            PeopleXFNProperties =  new HashMap<String, URI>();
+        }
+
+        URI result = createProperty(NS, localName + "-hyperlink");
+        HyperlinkXFNProperties.put(localName, result);
+
+        result = createProperty(NS, localName);
+        PeopleXFNProperties.put(localName, result);
+        return result;
+    }
+
+    private XFN(){
+        super(NS);
+    }
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/XHTML.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/XHTML.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/XHTML.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/XHTML.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * The <a href="http://www.w3.org/1999/xhtml/vocab/">XHTML</a> vocabulary.
+ */
+public class XHTML extends Vocabulary {
+
+    public static final String NS = "http://www.w3.org/1999/xhtml/vocab#";
+
+    private static XHTML instance;
+
+    public static XHTML getInstance() {
+        if(instance == null) {
+            instance = new XHTML();
+        }
+        return instance;
+    }
+
+    public final URI license    = createProperty(NS, "license"   );
+    public final URI meta       = createProperty(NS, "meta"      );
+    public final URI alternate  = createProperty(NS, "alternate" );
+    public final URI stylesheet = createProperty(NS, "stylesheet");
+
+    private XHTML(){
+        super(NS);
+    }
+    
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/package-info.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/package-info.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/package-info.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/vocab/package-info.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This package defines the main ontologies used by <i>Any23</i>.
+ */
+package org.apache.any23.vocab;
\ No newline at end of file

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/FormatWriter.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/FormatWriter.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/FormatWriter.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/FormatWriter.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.writer;
+
+/**
+ * Base interface used for the definition of <i>RDF format writers</i>.
+ */
+public interface FormatWriter extends TripleHandler {
+
+    /**
+     * If <code>true</code> then the produced <b>RDF</b> is annotated with
+     * the extractors used to generate the specific statements.
+     *
+     * @return the annotation flag value.
+     */
+     boolean isAnnotated();
+
+    /**
+     * Sets the <i>annotation</i> flag.
+     *
+     * @param f If <code>true</code> then the produced <b>RDF</b> is annotated with
+     *          the extractors used to generate the specific statements.
+     */
+     void setAnnotated(boolean f);
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/TripleHandler.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/TripleHandler.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/TripleHandler.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/TripleHandler.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.writer;
+
+import org.apache.any23.extractor.ExtractionContext;
+import org.openrdf.model.Resource;
+import org.openrdf.model.URI;
+import org.openrdf.model.Value;
+
+/**
+ * Defines a document based triple handler.
+ */
+public interface TripleHandler {
+
+    void startDocument(URI documentURI) throws TripleHandlerException;
+
+    /**
+     * Informs the handler that a new context has been established.
+     * Contexts are not guaranteed to receive any triples, so they
+     * might be closed without any triple.
+     */
+    void openContext(ExtractionContext context) throws TripleHandlerException;
+
+    /**
+     * Invoked with a currently open context,
+     * notifies the detection of a triple.
+     *
+     * @param s triple subject, cannot be <code>null</code>.
+     * @param p triple predicate, cannot be <code>null</code>.
+     * @param o triple object, cannot be <code>null</code>.
+     * @param g triple graph, can be <code>null</code>.
+     * @param context extraction context.
+     * @throws TripleHandlerException
+     */
+    void receiveTriple(Resource s, URI p, Value o, URI g, ExtractionContext context) throws TripleHandlerException;
+
+    /**
+     * Invoked with a currently open context, notifies the detection of a
+     * namespace.
+     *
+     * @param prefix namespace prefix.
+     * @param uri namespace <i>URI</i>.
+     * @param context namespace context.
+     * @throws TripleHandlerException
+     */
+    void receiveNamespace(String prefix, String uri, ExtractionContext context) throws TripleHandlerException;
+
+    /**
+     * Informs the handler that no more triples will come from a
+     * previously opened context. All contexts are guaranteed to
+     * be closed before the final close(). The document context
+     * for each document is guaranteed to be closed after all
+     * local contexts of that document.
+     *
+     * @param context the context to be closed.
+     * @throws TripleHandlerException
+     */
+    void closeContext(ExtractionContext context) throws TripleHandlerException;
+
+    /**
+     * Informs the handler that the end of the document
+     * has been reached.
+     *
+     * @param documentURI document URI.
+     * @throws TripleHandlerException
+     */
+    void endDocument(URI documentURI) throws TripleHandlerException;
+
+    /**
+     * Sets the length of the content to be processed.
+     *
+     * @param contentLength
+     * @throws TripleHandlerException
+     */
+    void setContentLength(long contentLength);
+
+    /**
+     * Will be called last and exactly once.
+     * @throws TripleHandlerException
+     */
+    void close() throws TripleHandlerException;
+
+}
\ No newline at end of file

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/TripleHandlerException.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/TripleHandlerException.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/TripleHandlerException.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/TripleHandlerException.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.writer;
+
+/**
+ * Defines a generic exception raised when accessing to {@link TripleHandler} class.
+ * 
+ * @author Davide Palmisano (palmisano@fbk.eu)
+ */
+public class TripleHandlerException extends Exception {
+
+    public TripleHandlerException(String s) {
+        super(s);
+    }
+
+
+    public TripleHandlerException(String s, Exception exception) {
+        super(s, exception);
+    }
+
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/WriterFactory.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/WriterFactory.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/WriterFactory.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/WriterFactory.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.writer;
+
+import java.io.OutputStream;
+
+import org.openrdf.rio.RDFFormat;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ * 
+ */
+public interface WriterFactory {
+    RDFFormat getRdfFormat();
+
+    String getIdentifier();
+
+    String getMimeType();
+
+    FormatWriter getRdfWriter(OutputStream os);
+}

Added: incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java (added)
+++ incubator/any23/trunk/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.writer;
+
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.ServiceConfigurationError;
+import java.util.ServiceLoader;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Registry class for {@link WriterFactory}s.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class WriterFactoryRegistry {
+
+    private static final Logger LOG = LoggerFactory.getLogger(WriterFactoryRegistry.class);
+    
+    /**
+     * Singleton instance.
+     */
+    private static WriterFactoryRegistry instance;
+
+    /**
+     * List of registered writers.
+     */
+    private final List<WriterFactory> writers =
+            new ArrayList<WriterFactory>();
+
+    /**
+     * MIME Type to {@link FormatWriter} class.
+     */
+    private final Map<String,List<WriterFactory>> mimeToWriter =
+            new HashMap<String, List<WriterFactory>>();
+
+    /**
+     * Identifier to {@link FormatWriter} class.
+     */
+    private final Map<String,WriterFactory> idToWriter =
+            new HashMap<String, WriterFactory>();
+
+    private List<String> identifiers = new ArrayList<String>();
+
+    /**
+     * Reads the identifier specified for the given {@link FormatWriter}.
+     *
+     * @param writerClass writer class.
+     * @return identifier.
+     */
+    public static String getIdentifier(WriterFactory writerClass) {
+        return writerClass.getIdentifier();
+    }
+
+    /**
+     * Reads the <i>MIME Type</i> specified for the given {@link FormatWriter}.
+     *
+     * @param writerClass writer class.
+     * @return MIME type.
+     */
+    public static String getMimeType(WriterFactory writerClass) {
+        return writerClass.getMimeType();
+    }
+
+    /**
+     * @return the {@link WriterFactoryRegistry} singleton instance.
+     */
+    public synchronized static WriterFactoryRegistry getInstance() {
+        if(instance == null) {
+            instance = new WriterFactoryRegistry();
+        }
+        return instance;
+    }
+
+    public WriterFactoryRegistry() {
+        ServiceLoader<WriterFactory> serviceLoader = java.util.ServiceLoader.load(WriterFactory.class, this.getClass().getClassLoader());
+        
+        Iterator<WriterFactory> iterator = serviceLoader.iterator();
+        
+        // use while(true) loop so that we can isolate all service loader errors from .next and .hasNext to a single service
+        while(true)
+        {
+            try
+            {
+                if(!iterator.hasNext())
+                    break;
+                
+                WriterFactory factory = iterator.next();
+                
+                this.register(factory);
+            }
+            catch(ServiceConfigurationError error)
+            {
+                LOG.error("Found error loading a WriterFactory", error);
+            }
+        }
+    }
+
+    /**
+     * Registers a new {@link WriterFactory} to the registry.
+     *
+     * @param writerClass the class of the writer to be registered.
+     * @throws IllegalArgumentException if the id or the mimetype are null
+     *                                  or empty strings or if the identifier has been already defined.
+     */
+    public synchronized void register(WriterFactory writerClass) {
+        if(writerClass == null) throw new NullPointerException("writerClass cannot be null.");
+        final String id       = writerClass.getIdentifier();
+        final String mimeType = writerClass.getMimeType();
+        if(id == null || id.trim().length() == 0) {
+            throw new IllegalArgumentException("Invalid identifier returned by writer " + writerClass);
+        }
+        if(mimeType == null || mimeType.trim().length() == 0) {
+            throw new IllegalArgumentException("Invalid MIME type returned by writer " + writerClass);
+        }
+        if(idToWriter.containsKey(id))
+            throw new IllegalArgumentException("The writer identifier is already declared.");
+
+        writers.add(writerClass);
+        identifiers.add(writerClass.getIdentifier());
+        List<WriterFactory> writerClasses = mimeToWriter.get(mimeType);
+        if(writerClasses == null) {
+            writerClasses = new ArrayList<WriterFactory>();
+            mimeToWriter.put(mimeType, writerClasses);
+        }
+        writerClasses.add(writerClass);
+        idToWriter.put(id, writerClass);
+    }
+
+    /**
+     * Verifies if a {@link FormatWriter} with given <code>id</code> identifier has been registered.
+     *
+     * @param id identifier.
+     * @return <code>true</code> if the identifier has been registered, <code>false</code> otherwise.
+     */
+    public synchronized boolean hasIdentifier(String id) {
+        return idToWriter.containsKey(id);
+    }
+
+    /**
+     * @return the list of all the specified identifiers.
+     */
+    public synchronized List<String> getIdentifiers() {
+        return Collections.unmodifiableList(identifiers);
+    }
+
+    /**
+     * @return the list of MIME types covered by the registered {@link FormatWriter}s.
+     */
+    public synchronized Collection<String> getMimeTypes() {
+        return Collections.unmodifiableCollection(mimeToWriter.keySet());
+    }
+
+    /**
+     * @return the list of all the registered {@link FormatWriter}s.
+     */
+    public synchronized List<WriterFactory> getWriters() {
+        return Collections.unmodifiableList(writers);
+    }
+
+    /**
+     * Returns the {@link FormatWriter} identified by <code>id</code>.
+     *
+     * @param id the writer identifier.
+     * @return the class of the {@link FormatWriter} matching the <code>id</code>
+     *         or <code>null</code> if not found.s
+     */
+    public synchronized WriterFactory getWriterByIdentifier(String id) {
+        return idToWriter.get(id);
+    }
+
+    /**
+     * Returns all the writers matching the specified <code>mimeType</code>.
+     *
+     * @param mimeType a MIMEType.
+     * @return a list of matching writers or an empty list.
+     */
+    public synchronized Collection<WriterFactory> getWritersByMimeType(String mimeType) {
+        final List<WriterFactory> writerClasses = mimeToWriter.get(mimeType);
+        return writerClasses;
+    }
+
+    /**
+     * Returns an instance of {@link FormatWriter} ready to write on the given <code>os</code>
+     * {@link OutputStream}.
+     *
+     * @param id the identifier of the {@link FormatWriter} to crate an instance.
+     * @param os the output stream.
+     * @return the not <code>null</code> {@link FormatWriter} instance.
+     * @throws NullPointerException if the <code>id</code> doesn't match any registered writer.
+     */
+    public synchronized FormatWriter getWriterInstanceByIdentifier(String id, OutputStream os) {
+        final  WriterFactory writerClazz = getWriterByIdentifier(id);
+        if(writerClazz == null)
+            throw new NullPointerException(
+                String.format("Cannot find writer with id '%s' .", id)
+            );
+        return createWriter(writerClazz, os);
+    }
+
+    /**
+     * Crates a writer instance.
+     *
+     * @param clazz class to instantiate.
+     * @param os output stream to pass as constructor argument.
+     * @return created instance.
+     * @throws IllegalArgumentException if an error occurs during instantiation.
+     */
+    private FormatWriter createWriter(WriterFactory clazz, OutputStream os) {
+        try {
+            return clazz.getRdfWriter(os);
+        } catch (Exception e) {
+            throw new IllegalArgumentException("Error while initializing format writer " + clazz + " .", e);
+        }
+    }
+
+}

Modified: incubator/any23/trunk/core/pom.xml
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/pom.xml?rev=1372269&r1=1372268&r2=1372269&view=diff
==============================================================================
--- incubator/any23/trunk/core/pom.xml (original)
+++ incubator/any23/trunk/core/pom.xml Mon Aug 13 06:15:29 2012
@@ -32,6 +32,11 @@
   <dependencies>
     <dependency>
       <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-api</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
       <artifactId>apache-any23-test-resources</artifactId>
       <version>${project.version}</version>
       <type>test-jar</type>

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java?rev=1372269&r1=1372268&r2=1372269&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java Mon Aug 13 06:15:29 2012
@@ -24,6 +24,7 @@ import org.apache.any23.extractor.Extrac
 import org.apache.any23.extractor.ExtractorFactory;
 import org.apache.any23.extractor.ExtractorGroup;
 import org.apache.any23.extractor.ExtractorRegistry;
+import org.apache.any23.extractor.ExtractorRegistryImpl;
 import org.apache.any23.extractor.SingleDocumentExtraction;
 import org.apache.any23.extractor.SingleDocumentExtractionReport;
 import org.apache.any23.http.AcceptHeaderBuilder;
@@ -105,7 +106,7 @@ public class Any23 {
         this.defaultUserAgent = configuration.getPropertyOrFail("any23.http.user.agent.default");
 
         this.factories = (extractorGroup == null)
-                ? ExtractorRegistry.getInstance().getExtractorGroup()
+                ? ExtractorRegistryImpl.getInstance().getExtractorGroup()
                 : extractorGroup;
         setCacheFactory(new MemCopyFactory());
     }
@@ -132,7 +133,7 @@ public class Any23 {
                         ?
                 null
                         :
-                ExtractorRegistry.getInstance().getExtractorGroup( Arrays.asList(extractorNames))
+                ExtractorRegistryImpl.getInstance().getExtractorGroup( Arrays.asList(extractorNames))
         );
     }
 

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java?rev=1372269&r1=1372268&r2=1372269&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java Mon Aug 13 06:15:29 2012
@@ -22,6 +22,7 @@ import com.beust.jcommander.Parameters;
 import org.apache.any23.extractor.ExampleInputOutput;
 import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.Extractor;
+import org.apache.any23.extractor.ExtractorRegistryImpl;
 import org.apache.any23.extractor.Extractor.BlindExtractor;
 import org.apache.any23.extractor.Extractor.ContentExtractor;
 import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
@@ -58,21 +59,21 @@ public class ExtractorDocumentation impl
 
     public void run() throws Exception {
         if (showList) {
-            printExtractorList();
+            printExtractorList(ExtractorRegistryImpl.getInstance());
         } else if (showInput) {
             if (extractor.isEmpty()) {
                 throw new IllegalArgumentException("Required argument for -i: extractor name");
             }
 
-            printExampleInput(extractor.get(0));
+            printExampleInput(extractor.get(0), ExtractorRegistryImpl.getInstance());
         } else if (showOutput) {
             if (extractor.isEmpty()) {
                 throw new IllegalArgumentException("Required argument for -o: extractor name");
             }
 
-            printExampleOutput(extractor.get(0));
+            printExampleOutput(extractor.get(0), ExtractorRegistryImpl.getInstance());
         } else if (showAll) {
-            printReport();
+            printReport(ExtractorRegistryImpl.getInstance());
         }
     }
 
@@ -88,8 +89,8 @@ public class ExtractorDocumentation impl
     /**
      * Prints the list of all the available extractors.
      */
-    public void printExtractorList() {
-        for (ExtractorFactory factory : ExtractorRegistry.getInstance().getExtractorGroup()) {
+    public void printExtractorList(ExtractorRegistry registry) {
+        for (ExtractorFactory factory : registry.getExtractorGroup()) {
             System.out.println( String.format("%25s [%15s]", factory.getExtractorName(), factory.getExtractorType()));
         }
     }
@@ -98,10 +99,11 @@ public class ExtractorDocumentation impl
      * Prints an example of input for the provided extractor.
      *
      * @param extractorName the name of the extractor
+     * @param registry 
      * @throws IOException raised if no extractor is found with that name
      */
-    public void printExampleInput(String extractorName) throws IOException {
-        ExtractorFactory<?> factory = getFactory(extractorName);
+    public void printExampleInput(String extractorName, ExtractorRegistry registry) throws IOException {
+        ExtractorFactory<?> factory = getFactory(registry, extractorName);
         ExampleInputOutput example = new ExampleInputOutput(factory);
         String input = example.getExampleInput();
         if (input == null) {
@@ -114,11 +116,12 @@ public class ExtractorDocumentation impl
      * Prints an output example for the given extractor.
      *
      * @param extractorName the extractor name
+     * @param registry 
      * @throws IOException raised if no extractor is found with that name
      * @throws ExtractionException
      */
-    public void printExampleOutput(String extractorName) throws IOException, ExtractionException {
-        ExtractorFactory<?> factory = getFactory(extractorName);
+    public void printExampleOutput(String extractorName, ExtractorRegistry registry) throws IOException, ExtractionException {
+        ExtractorFactory<?> factory = getFactory(registry, extractorName);
         ExampleInputOutput example = new ExampleInputOutput(factory);
         String output = example.getExampleOutput();
         if (output == null) {
@@ -133,9 +136,9 @@ public class ExtractorDocumentation impl
      * @throws IOException
      * @throws ExtractionException
      */
-    public void printReport() throws IOException, ExtractionException {
-        for (String extractorName : ExtractorRegistry.getInstance().getAllNames()) {
-            ExtractorFactory<?> factory = ExtractorRegistry.getInstance().getFactory(extractorName);
+    public void printReport(ExtractorRegistry registry) throws IOException, ExtractionException {
+        for (String extractorName : registry.getAllNames()) {
+            ExtractorFactory<?> factory = registry.getFactory(extractorName);
             ExampleInputOutput example = new ExampleInputOutput(factory);
             System.out.println("Extractor: " + extractorName);
             System.out.println("\ttype: " + getType(factory));
@@ -155,11 +158,11 @@ public class ExtractorDocumentation impl
         }
     }
 
-    private ExtractorFactory<?> getFactory(String name) {
-        if (!ExtractorRegistry.getInstance().isRegisteredName(name)) {
+    private ExtractorFactory<?> getFactory(ExtractorRegistry registry, String name) {
+        if (!registry.isRegisteredName(name)) {
             throw new IllegalArgumentException("Unknown extractor name: " + name);
         }
-        return ExtractorRegistry.getInstance().getFactory(name);
+        return registry.getFactory(name);
     }
 
     private String getType(ExtractorFactory<?> factory) {

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Rover.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Rover.java?rev=1372269&r1=1372268&r2=1372269&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Rover.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Rover.java Mon Aug 13 06:15:29 2012
@@ -27,7 +27,6 @@ import org.apache.any23.configuration.Co
 import org.apache.any23.configuration.DefaultConfiguration;
 import org.apache.any23.extractor.ExtractionParameters;
 import org.apache.any23.extractor.ExtractionParameters.ValidationMode;
-import org.apache.any23.extractor.SingleDocumentExtraction;
 import org.apache.any23.filter.IgnoreAccidentalRDFa;
 import org.apache.any23.filter.IgnoreTitlesOfEmptyDocuments;
 import org.apache.any23.source.DocumentSource;
@@ -36,7 +35,7 @@ import org.apache.any23.writer.LoggingTr
 import org.apache.any23.writer.ReportingTripleHandler;
 import org.apache.any23.writer.TripleHandler;
 import org.apache.any23.writer.TripleHandlerException;
-import org.apache.any23.writer.WriterRegistry;
+import org.apache.any23.writer.WriterFactoryRegistry;
 import org.kohsuke.MetaInfServices;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -47,7 +46,6 @@ import java.io.PrintStream;
 import java.io.PrintWriter;
 import java.net.MalformedURLException;
 import java.net.URL;
-import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.List;
 
@@ -65,7 +63,7 @@ import static java.lang.String.format;
 @Parameters(commandNames = { "rover" }, commandDescription = "Any23 Command Line Tool.")
 public class Rover implements Tool {
 
-    private static final String[] FORMATS = WriterRegistry.getInstance().getIdentifiers();
+    private static final List<String> FORMATS = WriterFactoryRegistry.getInstance().getIdentifiers();
 
     private static final int DEFAULT_FORMAT_INDEX = 0;
 
@@ -85,7 +83,7 @@ public class Rover implements Tool {
     private List<String> extractors = new LinkedList<String>();
 
     @Parameter(names = { "-f", "--format" }, description = "the output format")
-    private String format = FORMATS[DEFAULT_FORMAT_INDEX];
+    private String format = FORMATS.get(DEFAULT_FORMAT_INDEX);
 
     @Parameter(
        names = { "-l", "--log" },
@@ -123,12 +121,12 @@ public class Rover implements Tool {
 
     protected void configure() {
         try {
-            tripleHandler = WriterRegistry.getInstance().getWriterInstanceByIdentifier(format, outputStream);
+            tripleHandler = WriterFactoryRegistry.getInstance().getWriterInstanceByIdentifier(format, outputStream);
         } catch (Exception e) {
             throw new NullPointerException(
                     format("Invalid output format '%s', admitted values: %s",
                         format,
-                        Arrays.toString(FORMATS)
+                        FORMATS
                     )
             );
         }
@@ -162,7 +160,7 @@ public class Rover implements Tool {
                         :
                 new ExtractionParameters(configuration, ValidationMode.None          , nestingDisabled);
         if (defaultns != null) {
-            extractionParameters.setProperty(SingleDocumentExtraction.EXTRACTION_CONTEXT_URI_PROPERTY,
+            extractionParameters.setProperty(ExtractionParameters.EXTRACTION_CONTEXT_URI_PROPERTY,
                                              defaultns);
         }
 

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExampleInputOutput.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExampleInputOutput.java?rev=1372269&r1=1372268&r2=1372269&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExampleInputOutput.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExampleInputOutput.java Mon Aug 13 06:15:29 2012
@@ -40,7 +40,7 @@ public class ExampleInputOutput {
     private final ExtractorFactory<?> factory;
 
     public ExampleInputOutput(String extractorName) {
-        this(ExtractorRegistry.getInstance().getFactory(extractorName));
+        this(ExtractorRegistryImpl.getInstance().getFactory(extractorName));
     }
 
     public ExampleInputOutput(ExtractorFactory<?> factory) {

Added: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java (added)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor;
+
+import org.apache.any23.configuration.DefaultConfiguration;
+import org.apache.any23.extractor.csv.CSVExtractor;
+import org.apache.any23.extractor.html.AdrExtractor;
+import org.apache.any23.extractor.html.GeoExtractor;
+import org.apache.any23.extractor.html.HCalendarExtractor;
+import org.apache.any23.extractor.html.HCardExtractor;
+import org.apache.any23.extractor.html.HListingExtractor;
+import org.apache.any23.extractor.html.HRecipeExtractor;
+import org.apache.any23.extractor.html.HResumeExtractor;
+import org.apache.any23.extractor.html.HReviewExtractor;
+import org.apache.any23.extractor.html.HTMLMetaExtractor;
+import org.apache.any23.extractor.html.HeadLinkExtractor;
+import org.apache.any23.extractor.html.ICBMExtractor;
+import org.apache.any23.extractor.html.LicenseExtractor;
+import org.apache.any23.extractor.html.SpeciesExtractor;
+import org.apache.any23.extractor.html.TitleExtractor;
+import org.apache.any23.extractor.html.TurtleHTMLExtractor;
+import org.apache.any23.extractor.html.XFNExtractor;
+import org.apache.any23.extractor.microdata.MicrodataExtractor;
+import org.apache.any23.extractor.rdf.NQuadsExtractor;
+import org.apache.any23.extractor.rdf.NTriplesExtractor;
+import org.apache.any23.extractor.rdf.RDFXMLExtractor;
+import org.apache.any23.extractor.rdf.TriXExtractor;
+import org.apache.any23.extractor.rdf.TurtleExtractor;
+import org.apache.any23.extractor.rdfa.RDFa11Extractor;
+import org.apache.any23.extractor.rdfa.RDFaExtractor;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ *  Singleton class acting as a register for all the various
+ *  {@link Extractor}.
+ */
+public class ExtractorRegistryImpl implements ExtractorRegistry {
+
+    /**
+     * The instance.
+     */
+    private static ExtractorRegistry instance = null;
+
+    /**
+     * maps containing the related {@link ExtractorFactory} for each
+     * registered {@link Extractor}.
+     */
+    private Map<String, ExtractorFactory<?>> factories = new HashMap<String, ExtractorFactory<?>>();
+
+    /**
+     * @return returns the {@link ExtractorRegistry} instance.
+     */
+    public static ExtractorRegistry getInstance() {
+        // Thread-safe
+        synchronized (ExtractorRegistry.class) {
+            final DefaultConfiguration conf = DefaultConfiguration.singleton();
+            if (instance == null) {
+                instance = new ExtractorRegistryImpl();
+                // FIXME: Remove these hardcoded links to the extractor factories by turning them into SPI interfaces
+                instance.register(RDFXMLExtractor.factory);
+                instance.register(TurtleExtractor.factory);
+                instance.register(NTriplesExtractor.factory);
+                instance.register(NQuadsExtractor.factory);
+                instance.register(TriXExtractor.factory);
+                if(conf.getFlagProperty("any23.extraction.rdfa.programmatic")) {
+                    instance.register(RDFa11Extractor.factory);
+                } else {
+                    instance.register(RDFaExtractor.factory);
+                }
+                instance.register(HeadLinkExtractor.factory);
+                instance.register(LicenseExtractor.factory);
+                instance.register(TitleExtractor.factory);
+                instance.register(XFNExtractor.factory);
+                instance.register(ICBMExtractor.factory);
+                instance.register(AdrExtractor.factory);
+                instance.register(GeoExtractor.factory);
+                instance.register(HCalendarExtractor.factory);
+                instance.register(HCardExtractor.factory);
+                instance.register(HListingExtractor.factory);
+                instance.register(HResumeExtractor.factory);
+                instance.register(HReviewExtractor.factory);
+                instance.register(HRecipeExtractor.factory);
+                instance.register(SpeciesExtractor.factory);
+                instance.register(TurtleHTMLExtractor.factory);
+                instance.register(MicrodataExtractor.factory);
+                instance.register(CSVExtractor.factory);
+                if(conf.getFlagProperty("any23.extraction.head.meta")) {
+                    instance.register(HTMLMetaExtractor.factory);
+                }
+            }
+        }
+        return instance;
+    }
+
+    /**
+     * Registers an {@link ExtractorFactory}.
+     *
+     * @param factory
+     * @throws IllegalArgumentException if trying to register a {@link ExtractorFactory}
+     *         with a that already exists in the registry.
+     */
+    public void register(ExtractorFactory<?> factory) {
+        if (factories.containsKey(factory.getExtractorName())) {
+            throw new IllegalArgumentException(String.format("Extractor name clash: %s",
+                    factory.getExtractorName()));
+        }
+        factories.put(factory.getExtractorName(), factory);
+    }
+
+    /**
+     *
+     * Retrieves a {@link ExtractorFactory} given its name
+     *
+     * @param name of the desired factory
+     * @return the {@link ExtractorFactory} associated to the provided name
+     * @throws IllegalArgumentException if there is not a
+     * {@link ExtractorFactory} associated to the provided name.
+     */
+    public ExtractorFactory<?> getFactory(String name) {
+        if (!factories.containsKey(name)) {
+            throw new IllegalArgumentException("Unregistered extractor name: " + name);
+        }
+        return factories.get(name);
+    }
+
+    /**
+     * @return an {@link ExtractorGroup} with all the registered
+     * {@link Extractor}.
+     */
+    public ExtractorGroup getExtractorGroup() {
+        return getExtractorGroup(getAllNames());
+    }
+
+    /**
+     * Returns an {@link ExtractorGroup} containing the
+     * {@link ExtractorFactory} mathing the names provided as input.
+     * @param names a {@link java.util.List} containing the names of the desired {@link ExtractorFactory}.
+     * @return the extraction group.
+     */
+    public ExtractorGroup getExtractorGroup(List<String> names) {
+        List<ExtractorFactory<?>> members = new ArrayList<ExtractorFactory<?>>(names.size());
+        for (String name : names) {
+            members.add(getFactory(name));
+        }
+        return new ExtractorGroup(members);
+    }
+
+    /**
+     * 
+     * @param name of the {@link ExtractorFactory}
+     * @return <code>true</code> if is there a {@link ExtractorFactory}
+     * associated to the provided name.
+     */
+    public boolean isRegisteredName(String name) {
+        return factories.containsKey(name);
+    }
+
+    /**
+     * Returns the names of all registered extractors, sorted alphabetically.
+     */
+    public List<String> getAllNames() {
+        List<String> result = new ArrayList<String>(factories.keySet());
+        Collections.sort(result);
+        return result;
+    }
+
+}

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java?rev=1372269&r1=1372268&r2=1372269&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java Mon Aug 13 06:15:29 2012
@@ -72,12 +72,6 @@ import static org.apache.any23.extractor
  */
 public class SingleDocumentExtraction {
 
-    public static final String EXTRACTION_CONTEXT_URI_PROPERTY = "any23.extraction.context.uri";
-
-    public static final String METADATA_TIMESIZE_FLAG           = "any23.extraction.metadata.timesize";
-    public static final String METADATA_NESTING_FLAG            = "any23.extraction.metadata.nesting";
-    public static final String METADATA_DOMAIN_PER_ENTITY_FLAG  = "any23.extraction.metadata.domain.per.entity";
-
     private static final SINDICE vSINDICE = SINDICE.getInstance();
 
     private final static Logger log = LoggerFactory.getLogger(SingleDocumentExtraction.class);
@@ -214,7 +208,7 @@ public class SingleDocumentExtraction {
             extractionParameters = ExtractionParameters.newDefault(configuration);
         }
 
-        final String contextURI = extractionParameters.getProperty(EXTRACTION_CONTEXT_URI_PROPERTY);
+        final String contextURI = extractionParameters.getProperty(ExtractionParameters.EXTRACTION_CONTEXT_URI_PROPERTY);
         ensureHasLocalCopy();
         try {
             this.documentURI = new Any23ValueFactoryWrapper(
@@ -271,9 +265,9 @@ public class SingleDocumentExtraction {
         }
 
         // Resource consolidation.
-        final boolean addDomainTriples = extractionParameters.getFlag(METADATA_DOMAIN_PER_ENTITY_FLAG);
+        final boolean addDomainTriples = extractionParameters.getFlag(ExtractionParameters.METADATA_DOMAIN_PER_ENTITY_FLAG);
         final ExtractionContext consolidationContext;
-        if(extractionParameters.getFlag(METADATA_NESTING_FLAG)) {
+        if(extractionParameters.getFlag(ExtractionParameters.METADATA_NESTING_FLAG)) {
             // Consolidation with nesting.
             consolidationContext = consolidateResources(resourceRoots, propertyPaths, addDomainTriples, output);
         } else {
@@ -281,7 +275,7 @@ public class SingleDocumentExtraction {
         }
 
         // Adding time/size meta triples.
-        if (extractionParameters.getFlag(METADATA_TIMESIZE_FLAG)) {
+        if (extractionParameters.getFlag(ExtractionParameters.METADATA_TIMESIZE_FLAG)) {
             try {
                 addExtractionTimeSizeMetaTriples(consolidationContext);
             } catch (TripleHandlerException e) {

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/JSONWriter.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/JSONWriter.java?rev=1372269&r1=1372268&r2=1372269&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/JSONWriter.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/JSONWriter.java Mon Aug 13 06:15:29 2012
@@ -33,7 +33,6 @@ import java.io.PrintStream;
  *
  * @author Michele Mostarda (mostarda@fbk.eu)
  */
-@Writer(identifier = "json", mimeType = "text/json" )
 public class JSONWriter implements FormatWriter {
 
     private final PrintStream ps;

Added: incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java?rev=1372269&view=auto
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java (added)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java Mon Aug 13 06:15:29 2012
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.writer;
+
+import java.io.OutputStream;
+
+import org.kohsuke.MetaInfServices;
+import org.openrdf.rio.RDFFormat;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ * 
+ */
+@MetaInfServices
+public class JSONWriterFactory implements WriterFactory {
+
+    public static final String MIME_TYPE = "text/json";
+    public static final String IDENTIFIER = "json";
+
+    /**
+     * 
+     */
+    public JSONWriterFactory() {
+    }
+
+    @Override
+    public RDFFormat getRdfFormat() {
+        throw new RuntimeException(
+                "TODO: Implement an RDFFormat for this RDF JSON serialisation format");
+    }
+
+    @Override
+    public String getIdentifier() {
+        return JSONWriterFactory.IDENTIFIER;
+    }
+
+    @Override
+    public String getMimeType() {
+        return JSONWriterFactory.MIME_TYPE;
+    }
+
+    @Override
+    public FormatWriter getRdfWriter(OutputStream os) {
+        return new JSONWriter(os);
+    }
+
+}

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/NQuadsWriter.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/NQuadsWriter.java?rev=1372269&r1=1372268&r2=1372269&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/NQuadsWriter.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/NQuadsWriter.java Mon Aug 13 06:15:29 2012
@@ -24,7 +24,6 @@ import java.io.OutputStream;
  *
  * @author Michele Mostarda (mostarda@fbk.eu)
  */
-@Writer(identifier = "nquads", mimeType = "text/plain")
 public class NQuadsWriter extends RDFWriterTripleHandler implements FormatWriter {
 
     public NQuadsWriter(OutputStream os) {



Mime
View raw message