incubator-droids-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject svn commit: r1445240 - in /incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids: core/Task.java parse/FileNameParser.java parse/LinkedParserData.java parse/SimpleLinkParser.java
Date Tue, 12 Feb 2013 16:29:24 GMT
Author: tobr
Date: Tue Feb 12 16:29:24 2013
New Revision: 1445240

URL: http://svn.apache.org/r1445240
Log:
added new ParserData API  to the task

Added:
    incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java
  (with props)
Modified:
    incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java
    incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java
    incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java

Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java?rev=1445240&r1=1445239&r2=1445240&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java
(original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java
Tue Feb 12 16:29:24 2013
@@ -38,13 +38,20 @@ public interface Task extends Serializab
     public URI getURI();
 
     /**
-     * The data of the task.
+     * The raw data of the task.
      *
      * @return a Map of data values
      */
     public ContentEntity getContentEntity();
 
     /**
+     * The data extracted by the {@link Parser}
+     *
+     * @return the extracted data
+     */
+    public ParserData getParserData();
+
+    /**
      * @return The depth of the task
      */
     public int getDepth();
@@ -75,4 +82,5 @@ public interface Task extends Serializab
      * @return Task
      */
     public <T extends Task> T createTask(URI uri);
+
 }

Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java?rev=1445240&r1=1445239&r2=1445240&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java
(original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java
Tue Feb 12 16:29:24 2013
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.droids.parse;
 
 import org.apache.droids.core.DroidsException;
@@ -7,7 +23,7 @@ import org.apache.droids.core.Task;
 import java.io.IOException;
 
 /**
- * Simple Parser implmentation extracting the path component from
+ * Simple Parser implementation extracting the path component from
  * the URI of the task.
  * For file based walkers, this is file name of the file.
  *
@@ -21,6 +37,6 @@ public class FileNameParser implements P
     @Override
     public void parse(Task task) throws DroidsException, IOException {
         String path = task.getURI().getPath();
-        task.getContentEntity().put(FILENAME, path.substring(path.lastIndexOf('/') + 1));
+        task.getParserData().set(FILENAME, path.substring(path.lastIndexOf('/') + 1));
     }
 }

Added: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java?rev=1445240&view=auto
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java
(added)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java
Tue Feb 12 16:29:24 2013
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.parse;
+
+import org.apache.droids.core.ParserData;
+
+/**
+ *
+ *
+ *
+ */
+public class LinkedParserData extends ParserData {
+    public static final String ANCHOR_TEXT = "anchortext";
+    public static final String ANCHOR_TITLE = "anchortitle";
+
+    public LinkedParserData() {
+        super();
+    }
+
+    public void setAnchorText(String anchorText) {
+        this.set(ANCHOR_TEXT, anchorText);
+    }
+
+    public String getAnchorText() {
+        return this.get(ANCHOR_TEXT);
+    }
+
+    public void setAnchorTitle(String anchorTitle) {
+        this.set(ANCHOR_TITLE, anchorTitle);
+    }
+
+    public String getAnchorTitle() {
+        return this.get(ANCHOR_TITLE);
+    }
+
+}

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Propchange: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java?rev=1445240&r1=1445239&r2=1445240&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java
(original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java
Tue Feb 12 16:29:24 2013
@@ -1,8 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.droids.parse;
 
 import org.apache.droids.core.DroidsException;
+import org.apache.droids.core.LinkedTask;
 import org.apache.droids.core.Parser;
-import org.apache.droids.core.Task;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.slf4j.spi.LocationAwareLogger;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -18,21 +37,25 @@ import java.util.regex.Pattern;
  *
  * @version 1.0
  */
-public class SimpleLinkParser<T extends Task> implements Parser<T> {
+public class SimpleLinkParser implements Parser<LinkedTask> {
+    Logger logger = LoggerFactory.getLogger(SimpleLinkParser.class);
 
     @Override
-    public void parse(T task) throws DroidsException, IOException {
+    public void parse(LinkedTask task) throws DroidsException, IOException {
+        logger.info("parse " + task.getURI());
         InputStream inStream = task.getContentEntity().getContent();
         if (inStream != null) {
             Scanner s = new Scanner(inStream).useDelimiter("\\A");
             String content = s.hasNext() ? s.next() : "";
             Pattern linkPattern = Pattern.compile("<a[^>]+href=[\"']?([^\"'>]+)[\"']?[^>]*>(.+?)</a>",
 Pattern.CASE_INSENSITIVE|Pattern.DOTALL);
             Matcher pageMatcher = linkPattern.matcher(content);
-            Set<Task> links = new HashSet<Task>();
+            Set<LinkedTask> links = new HashSet<LinkedTask>();
             while(pageMatcher.find()){
-                links.add(task.createTask(task.getURI().resolve(pageMatcher.group(1))));
+                LinkedTask newTask = task.createTask(task.getURI().resolve(pageMatcher.group(1)));
+                links.add(newTask);
             }
-            task.getContentEntity().setLinks(links);
+            task.setTo(links);
         }
+
     }
 }



Mime
View raw message