sling-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From npelt...@apache.org
Subject [sling-org-apache-sling-pipes] branch master updated: SLING-7629 introducing RegexpPipe
Date Sun, 29 Apr 2018 19:43:58 GMT
This is an automated email from the ASF dual-hosted git repository.

npeltier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-pipes.git


The following commit(s) were added to refs/heads/master by this push:
     new 1673b29  SLING-7629 introducing RegexpPipe
     new 2b686d6  Merge branch 'master' of github.com:apache/sling-org-apache-sling-pipes
1673b29 is described below

commit 1673b29be33cdf7ed2d39cec9882a8204ac6c984
Author: Nicolas Peltier <peltier.nicolas@gmail.com>
AuthorDate: Sun Apr 29 21:41:58 2018 +0200

    SLING-7629 introducing RegexpPipe
    
    - available through .egrep command,
    - can be configured with  property, that will either be taken as a string (full match),
or a map, in cased named capturing groups have been used (see unit test for example)
---
 pom.xml                                            |   3 +-
 .../java/org/apache/sling/pipes/PipeBuilder.java   |   7 ++
 .../sling/pipes/internal/PipeBuilderImpl.java      |   8 ++
 .../apache/sling/pipes/internal/PlumberImpl.java   |   4 +
 .../pipes/internal/{ => inputstream}/CsvPipe.java  |   3 +-
 .../pipes/internal/{ => inputstream}/JsonPipe.java |   3 +-
 .../pipes/internal/inputstream/RegexpPipe.java     | 108 +++++++++++++++++++++
 .../java/org/apache/sling/pipes/package-info.java  |   2 +-
 .../internal/{ => inputstream}/CsvPipeTest.java    |   2 +-
 .../internal/{ => inputstream}/JsonPipeTest.java   |   2 +-
 .../pipes/internal/inputstream/RegexpPipeTest.java |  81 ++++++++++++++++
 src/test/resources/standardTest.html               |  27 ++++++
 12 files changed, 243 insertions(+), 7 deletions(-)

diff --git a/pom.xml b/pom.xml
index ded1611..ab5ba65 100644
--- a/pom.xml
+++ b/pom.xml
@@ -29,7 +29,7 @@
   </parent>
 
   <artifactId>org.apache.sling.pipes</artifactId>
-  <version>2.0.3-SNAPSHOT</version>
+  <version>2.1.0-SNAPSHOT</version>
 
   <name>Apache Sling Pipes</name>
   <description>bulk content changes tool</description>
@@ -67,6 +67,7 @@
           <excludes combine.children="append">
             <!-- test csv files can't have licenses embedded -->
             <exclude>src/test/resources/**/*.csv</exclude>
+            <exclude>src/test/resources/**/*.csv</exclude>
           </excludes>
         </configuration>
       </plugin>
diff --git a/src/main/java/org/apache/sling/pipes/PipeBuilder.java b/src/main/java/org/apache/sling/pipes/PipeBuilder.java
index dc97bb5..225a20a 100644
--- a/src/main/java/org/apache/sling/pipes/PipeBuilder.java
+++ b/src/main/java/org/apache/sling/pipes/PipeBuilder.java
@@ -107,6 +107,13 @@ public interface PipeBuilder {
     PipeBuilder json(String expr);
 
     /**
+     * attach a Regexp pipe to the current context
+     * @param expr text expr or URL or path in the resource tree
+     * @return updated instance of PipeBuilder
+     */
+    PipeBuilder egrep(String expr);
+
+    /**
      * Attach a path pipe to the current context
      * @param expr path to create
      * @return updated instance of PipeBuilder
diff --git a/src/main/java/org/apache/sling/pipes/internal/PipeBuilderImpl.java b/src/main/java/org/apache/sling/pipes/internal/PipeBuilderImpl.java
index be21ae0..3d6d10b 100644
--- a/src/main/java/org/apache/sling/pipes/internal/PipeBuilderImpl.java
+++ b/src/main/java/org/apache/sling/pipes/internal/PipeBuilderImpl.java
@@ -30,6 +30,9 @@ import org.apache.sling.pipes.Pipe;
 import org.apache.sling.pipes.PipeBuilder;
 import org.apache.sling.pipes.Plumber;
 import org.apache.sling.pipes.ReferencePipe;
+import org.apache.sling.pipes.internal.inputstream.CsvPipe;
+import org.apache.sling.pipes.internal.inputstream.JsonPipe;
+import org.apache.sling.pipes.internal.inputstream.RegexpPipe;
 import org.apache.sling.pipes.internal.slingquery.ChildrenPipe;
 import org.apache.sling.pipes.internal.slingquery.ClosestPipe;
 import org.apache.sling.pipes.internal.slingquery.FindPipe;
@@ -162,6 +165,11 @@ public class PipeBuilderImpl implements PipeBuilder {
     }
 
     @Override
+    public PipeBuilder egrep(String expr) {
+        return pipeWithExpr(RegexpPipe.RESOURCE_TYPE, expr);
+    }
+
+    @Override
     public PipeBuilder mkdir(String expr) {
         return pipeWithExpr(PathPipe.RESOURCE_TYPE, expr);
     }
diff --git a/src/main/java/org/apache/sling/pipes/internal/PlumberImpl.java b/src/main/java/org/apache/sling/pipes/internal/PlumberImpl.java
index 7ebce3b..dccca6c 100644
--- a/src/main/java/org/apache/sling/pipes/internal/PlumberImpl.java
+++ b/src/main/java/org/apache/sling/pipes/internal/PlumberImpl.java
@@ -43,6 +43,9 @@ import org.apache.sling.pipes.PipeBuilder;
 import org.apache.sling.pipes.Plumber;
 import org.apache.sling.pipes.PlumberMXBean;
 import org.apache.sling.pipes.ReferencePipe;
+import org.apache.sling.pipes.internal.inputstream.CsvPipe;
+import org.apache.sling.pipes.internal.inputstream.JsonPipe;
+import org.apache.sling.pipes.internal.inputstream.RegexpPipe;
 import org.apache.sling.pipes.internal.slingquery.ChildrenPipe;
 import org.apache.sling.pipes.internal.slingquery.ClosestPipe;
 import org.apache.sling.pipes.internal.slingquery.FindPipe;
@@ -154,6 +157,7 @@ public class PlumberImpl implements Plumber, JobConsumer, PlumberMXBean
{
         registerPipe(SiblingsPipe.RESOURCE_TYPE, SiblingsPipe.class);
         registerPipe(ClosestPipe.RESOURCE_TYPE, ClosestPipe.class);
         registerPipe(FindPipe.RESOURCE_TYPE, FindPipe.class);
+        registerPipe(RegexpPipe.RESOURCE_TYPE, RegexpPipe.class);
         toggleJmxRegistration(this, PlumberMXBean.class.getName(), true);
         refreshMonitoredPipes();
     }
diff --git a/src/main/java/org/apache/sling/pipes/internal/CsvPipe.java b/src/main/java/org/apache/sling/pipes/internal/inputstream/CsvPipe.java
similarity index 98%
rename from src/main/java/org/apache/sling/pipes/internal/CsvPipe.java
rename to src/main/java/org/apache/sling/pipes/internal/inputstream/CsvPipe.java
index 7619921..9d31e7b 100644
--- a/src/main/java/org/apache/sling/pipes/internal/CsvPipe.java
+++ b/src/main/java/org/apache/sling/pipes/internal/inputstream/CsvPipe.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sling.pipes.internal;
+package org.apache.sling.pipes.internal.inputstream;
 
 import org.apache.commons.lang3.StringUtils;
 import org.apache.sling.api.resource.Resource;
@@ -39,7 +39,6 @@ public class CsvPipe extends AbstractInputStreamPipe {
     private static Logger logger = LoggerFactory.getLogger(JsonPipe.class);
     public static final String RESOURCE_TYPE = RT_PREFIX + "csv";
 
-
     protected static final String PN_SEPARATOR = "separator";
 
     protected static final String DEFAULT_SEPARATOR = ",";
diff --git a/src/main/java/org/apache/sling/pipes/internal/JsonPipe.java b/src/main/java/org/apache/sling/pipes/internal/inputstream/JsonPipe.java
similarity index 98%
rename from src/main/java/org/apache/sling/pipes/internal/JsonPipe.java
rename to src/main/java/org/apache/sling/pipes/internal/inputstream/JsonPipe.java
index 15857cc..e7862fd 100644
--- a/src/main/java/org/apache/sling/pipes/internal/JsonPipe.java
+++ b/src/main/java/org/apache/sling/pipes/internal/inputstream/JsonPipe.java
@@ -14,13 +14,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sling.pipes.internal;
+package org.apache.sling.pipes.internal.inputstream;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.sling.api.resource.Resource;
 import org.apache.sling.pipes.AbstractInputStreamPipe;
 import org.apache.sling.pipes.Plumber;
+import org.apache.sling.pipes.internal.JsonUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
diff --git a/src/main/java/org/apache/sling/pipes/internal/inputstream/RegexpPipe.java b/src/main/java/org/apache/sling/pipes/internal/inputstream/RegexpPipe.java
new file mode 100644
index 0000000..330a99d
--- /dev/null
+++ b/src/main/java/org/apache/sling/pipes/internal/inputstream/RegexpPipe.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.pipes.internal.inputstream;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.sling.api.resource.Resource;
+import org.apache.sling.pipes.AbstractInputStreamPipe;
+import org.apache.sling.pipes.Plumber;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Pipe outputting matches of a regexp from a plain text file
+ */
+public class RegexpPipe extends AbstractInputStreamPipe {
+    private static Logger logger = LoggerFactory.getLogger(RegexpPipe.class);
+
+    public static final String RESOURCE_TYPE = "slingPipes/egrep";
+    public static final String PN_PATTERN = "pattern";
+    private static final Pattern PATTERN_NAME = Pattern.compile("\\?<([\\w]+)>");
+    private static final short PATTERN_IDX_NAME = 1;
+
+    public RegexpPipe(Plumber plumber, Resource resource) throws Exception {
+        super(plumber, resource);
+    }
+
+    @Override
+    public Iterator<Resource> getOutput(InputStream inputStream) {
+        Iterator<Resource> output = EMPTY_ITERATOR;
+        try {
+            String patternString = properties.get(PN_PATTERN, String.class);
+            final Collection<String> names = getGroupNames(patternString);
+            if (names.size() == 0){
+                logger.debug("no name defined, will take the whole match");
+            }
+            Pattern pattern = Pattern.compile(patternString);
+            String text = IOUtils.toString(inputStream, StandardCharsets.UTF_8);
+            logger.trace("about to parse {}", text);
+            Matcher matcher = pattern.matcher(text);
+            if (matcher.find()) {
+                output = new Iterator<Resource>() {
+                    boolean hasNext = true;
+                    @Override
+                    public boolean hasNext() {
+                        return hasNext;
+                    }
+
+                    @Override
+                    public Resource next() {
+                        if (names.size() > 0){
+                            Map map = new HashMap();
+                            for (String name : names) {
+                                map.put(name, matcher.group(name));
+                            }
+                            binding = map;
+                        } else {
+                            //no group names defined, we take the whole match
+                            binding = matcher.group(0);
+                        }
+                        hasNext = matcher.find();
+                        return getInput();
+                    }
+                };
+            }
+        } catch (IOException e) {
+            logger.error("unable to open input stream", e);
+        }
+        return output;
+    }
+
+    /**
+     * @param pattern configured pattern
+     * @return list of group names identified in a given pattern
+     */
+    protected Collection<String> getGroupNames(String pattern){
+        Collection<String> names = new ArrayList<>();
+        Matcher nameMatcher = PATTERN_NAME.matcher(pattern);
+        while (nameMatcher.find()){
+            names.add(nameMatcher.group(PATTERN_IDX_NAME));
+        }
+        return names;
+    }
+}
diff --git a/src/main/java/org/apache/sling/pipes/package-info.java b/src/main/java/org/apache/sling/pipes/package-info.java
index 18f6230..8528793 100644
--- a/src/main/java/org/apache/sling/pipes/package-info.java
+++ b/src/main/java/org/apache/sling/pipes/package-info.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-@Version("2.0.0")
+@Version("2.1.0")
 package org.apache.sling.pipes;
 
 import org.osgi.annotation.versioning.Version;
diff --git a/src/test/java/org/apache/sling/pipes/internal/CsvPipeTest.java b/src/test/java/org/apache/sling/pipes/internal/inputstream/CsvPipeTest.java
similarity index 97%
rename from src/test/java/org/apache/sling/pipes/internal/CsvPipeTest.java
rename to src/test/java/org/apache/sling/pipes/internal/inputstream/CsvPipeTest.java
index 52f523a..39a189c 100644
--- a/src/test/java/org/apache/sling/pipes/internal/CsvPipeTest.java
+++ b/src/test/java/org/apache/sling/pipes/internal/inputstream/CsvPipeTest.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sling.pipes.internal;
+package org.apache.sling.pipes.internal.inputstream;
 
 import org.apache.commons.collections.IteratorUtils;
 import org.apache.sling.api.resource.Resource;
diff --git a/src/test/java/org/apache/sling/pipes/internal/JsonPipeTest.java b/src/test/java/org/apache/sling/pipes/internal/inputstream/JsonPipeTest.java
similarity index 98%
rename from src/test/java/org/apache/sling/pipes/internal/JsonPipeTest.java
rename to src/test/java/org/apache/sling/pipes/internal/inputstream/JsonPipeTest.java
index 702ad62..c5ad5b9 100644
--- a/src/test/java/org/apache/sling/pipes/internal/JsonPipeTest.java
+++ b/src/test/java/org/apache/sling/pipes/internal/inputstream/JsonPipeTest.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sling.pipes.internal;
+package org.apache.sling.pipes.internal.inputstream;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
diff --git a/src/test/java/org/apache/sling/pipes/internal/inputstream/RegexpPipeTest.java
b/src/test/java/org/apache/sling/pipes/internal/inputstream/RegexpPipeTest.java
new file mode 100644
index 0000000..c21487c
--- /dev/null
+++ b/src/test/java/org/apache/sling/pipes/internal/inputstream/RegexpPipeTest.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.pipes.internal.inputstream;
+
+import org.apache.commons.collections.IteratorUtils;
+import org.apache.sling.api.resource.Resource;
+import org.apache.sling.pipes.AbstractPipeTest;
+import org.apache.sling.pipes.Pipe;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.*;
+
+public class RegexpPipeTest extends AbstractPipeTest {
+    @Test
+    public void getGroupNames() throws Exception {
+        RegexpPipe pipe = new RegexpPipe(plumber, context.resourceResolver().getResource("/content"));
+        Collection<String> names = pipe.getGroupNames("some (?<first>group) that
uses (?<name>names)");
+        assertEquals("there should be 2 names", 2, names.size());
+        assertTrue("there should be first", names.contains("first"));
+        assertTrue("there should be name", names.contains("name"));
+    }
+
+    @Test
+    public void getOutputWithSimpleMatch() throws Exception {
+        String htmlPath = "/content/test/standardTest.html";
+        context.load().binaryFile("/standardTest.html", htmlPath);
+        Pipe pipe = plumber.newPipe(context.resourceResolver())
+                .echo("/content")
+                .egrep(htmlPath).name("location").with("pattern","http://www.apache[^\\s]+")
+                .write("urls","+[${location}]").build();
+        Iterator<Resource> output = pipe.getOutput();
+        output.next();
+        Resource result = context.resourceResolver().getResource("/content/urls");
+        assertNotNull("there should be a result property", result);
+        String[] aUrls = result.adaptTo(String[].class);
+        assertNotNull("result property should be a MV", aUrls);
+        List<String> urls = Arrays.asList(aUrls);
+        assertEquals("there should be 1 elements", 1, urls.size());
+        assertEquals("first should be http://www.apache.org/licenses/LICENSE-2.0", "http://www.apache.org/licenses/LICENSE-2.0",
urls.get(0));
+    }
+
+    @Test
+    public void getOutputWithNames() throws Exception {
+        String htmlPath = "/content/test/standardTest.html";
+        context.load().binaryFile("/standardTest.html", htmlPath);
+        Pipe pipe = plumber.newPipe(context.resourceResolver())
+                .echo("/content")
+                .egrep(htmlPath).name("location").with("pattern","\"(?<domain>http://[^/]+)(?<uri>[^\"^\']+)\"")
+                .mkdir("${location.uri}")
+                .write("domain","${location.domain}").build();
+        Iterator<Resource> output = pipe.getOutput();
+        List<Resource> resources = IteratorUtils.toList(output);
+        List<String> paths = resources.stream().map( resource -> resource.getPath()).collect(Collectors.toList());
+        assertEquals("there should be 3 elements", 3, paths.size());
+        assertEquals("first should be /content/img/1.png", "/content/img/1.png", paths.get(0));
+        assertEquals("second should be /content/page.html", "/content/page.html", paths.get(1));
+        assertEquals("third should be /content/img/2.png", "/content/img/2.png", paths.get(2));
+        assertEquals("one created resource's domain should be somesite", "http://somesite.com",
+                context.resourceResolver().getResource("/content/img/1.png/domain").adaptTo(String.class));
+    }
+}
\ No newline at end of file
diff --git a/src/test/resources/standardTest.html b/src/test/resources/standardTest.html
new file mode 100644
index 0000000..e4141b1
--- /dev/null
+++ b/src/test/resources/standardTest.html
@@ -0,0 +1,27 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+<html>
+    <body>
+        <h1>some title</h1>
+        <p>
+            some paragraph and an image <img src="http://somesite.com/content/img/1.png">,
and a <a href="http://somesite.com/content/page.html">link</a>
+        </p>
+        <img src="http://somesite.com/content/img/2.png">
+    </body>
+</html>
\ No newline at end of file

-- 
To stop receiving notification emails like this one, please contact
npeltier@apache.org.

Mime
View raw message