From droids-commits-return-531-apmail-incubator-droids-commits-archive=incubator.apache.org@incubator.apache.org Tue Feb 12 16:29:48 2013 Return-Path: X-Original-To: apmail-incubator-droids-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-droids-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 8D072E276 for ; Tue, 12 Feb 2013 16:29:48 +0000 (UTC) Received: (qmail 87438 invoked by uid 500); 12 Feb 2013 16:29:48 -0000 Delivered-To: apmail-incubator-droids-commits-archive@incubator.apache.org Received: (qmail 87412 invoked by uid 500); 12 Feb 2013 16:29:48 -0000 Mailing-List: contact droids-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: droids-dev@incubator.apache.org Delivered-To: mailing list droids-commits@incubator.apache.org Received: (qmail 87404 invoked by uid 99); 12 Feb 2013 16:29:48 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 12 Feb 2013 16:29:48 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 12 Feb 2013 16:29:45 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 130C723889D5; Tue, 12 Feb 2013 16:29:25 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1445240 - in /incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids: core/Task.java parse/FileNameParser.java parse/LinkedParserData.java parse/SimpleLinkParser.java Date: Tue, 12 Feb 2013 16:29:24 -0000 To: droids-commits@incubator.apache.org From: tobr@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130212162925.130C723889D5@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: tobr Date: Tue Feb 12 16:29:24 2013 New Revision: 1445240 URL: http://svn.apache.org/r1445240 Log: added new ParserData API to the task Added: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java (with props) Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java?rev=1445240&r1=1445239&r2=1445240&view=diff ============================================================================== --- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java (original) +++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Task.java Tue Feb 12 16:29:24 2013 @@ -38,13 +38,20 @@ public interface Task extends Serializab public URI getURI(); /** - * The data of the task. + * The raw data of the task. * * @return a Map of data values */ public ContentEntity getContentEntity(); /** + * The data extracted by the {@link Parser} + * + * @return the extracted data + */ + public ParserData getParserData(); + + /** * @return The depth of the task */ public int getDepth(); @@ -75,4 +82,5 @@ public interface Task extends Serializab * @return Task */ public T createTask(URI uri); + } Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java?rev=1445240&r1=1445239&r2=1445240&view=diff ============================================================================== --- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java (original) +++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java Tue Feb 12 16:29:24 2013 @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.droids.parse; import org.apache.droids.core.DroidsException; @@ -7,7 +23,7 @@ import org.apache.droids.core.Task; import java.io.IOException; /** - * Simple Parser implmentation extracting the path component from + * Simple Parser implementation extracting the path component from * the URI of the task. * For file based walkers, this is file name of the file. * @@ -21,6 +37,6 @@ public class FileNameParser implements P @Override public void parse(Task task) throws DroidsException, IOException { String path = task.getURI().getPath(); - task.getContentEntity().put(FILENAME, path.substring(path.lastIndexOf('/') + 1)); + task.getParserData().set(FILENAME, path.substring(path.lastIndexOf('/') + 1)); } } Added: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java?rev=1445240&view=auto ============================================================================== --- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java (added) +++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java Tue Feb 12 16:29:24 2013 @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.droids.parse; + +import org.apache.droids.core.ParserData; + +/** + * + * + * + */ +public class LinkedParserData extends ParserData { + public static final String ANCHOR_TEXT = "anchortext"; + public static final String ANCHOR_TITLE = "anchortitle"; + + public LinkedParserData() { + super(); + } + + public void setAnchorText(String anchorText) { + this.set(ANCHOR_TEXT, anchorText); + } + + public String getAnchorText() { + return this.get(ANCHOR_TEXT); + } + + public void setAnchorTitle(String anchorTitle) { + this.set(ANCHOR_TITLE, anchorTitle); + } + + public String getAnchorTitle() { + return this.get(ANCHOR_TITLE); + } + +} Propchange: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java ------------------------------------------------------------------------------ svn:keywords = Author Date Id Revision Propchange: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/LinkedParserData.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java?rev=1445240&r1=1445239&r2=1445240&view=diff ============================================================================== --- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java (original) +++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java Tue Feb 12 16:29:24 2013 @@ -1,8 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.droids.parse; import org.apache.droids.core.DroidsException; +import org.apache.droids.core.LinkedTask; import org.apache.droids.core.Parser; -import org.apache.droids.core.Task; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.spi.LocationAwareLogger; import java.io.IOException; import java.io.InputStream; @@ -18,21 +37,25 @@ import java.util.regex.Pattern; * * @version 1.0 */ -public class SimpleLinkParser implements Parser { +public class SimpleLinkParser implements Parser { + Logger logger = LoggerFactory.getLogger(SimpleLinkParser.class); @Override - public void parse(T task) throws DroidsException, IOException { + public void parse(LinkedTask task) throws DroidsException, IOException { + logger.info("parse " + task.getURI()); InputStream inStream = task.getContentEntity().getContent(); if (inStream != null) { Scanner s = new Scanner(inStream).useDelimiter("\\A"); String content = s.hasNext() ? s.next() : ""; Pattern linkPattern = Pattern.compile("]+href=[\"']?([^\"'>]+)[\"']?[^>]*>(.+?)", Pattern.CASE_INSENSITIVE|Pattern.DOTALL); Matcher pageMatcher = linkPattern.matcher(content); - Set links = new HashSet(); + Set links = new HashSet(); while(pageMatcher.find()){ - links.add(task.createTask(task.getURI().resolve(pageMatcher.group(1)))); + LinkedTask newTask = task.createTask(task.getURI().resolve(pageMatcher.group(1))); + links.add(newTask); } - task.getContentEntity().setLinks(links); + task.setTo(links); } + } }