incubator-crunch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From m...@apache.org
Subject git commit: CRUNCH-83: Create a Maven Archetype.
Date Sun, 30 Sep 2012 19:43:33 GMT
Updated Branches:
  refs/heads/master 0083239b2 -> a79ce8a76


CRUNCH-83: Create a Maven Archetype.

Add a Maven Archetype as a new module.
Add slf4j-api and commons-io to Crunch compile dependencies.


Project: http://git-wip-us.apache.org/repos/asf/incubator-crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-crunch/commit/a79ce8a7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-crunch/tree/a79ce8a7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-crunch/diff/a79ce8a7

Branch: refs/heads/master
Commit: a79ce8a7674589b3c91e625f2a3f242f528edb8f
Parents: 0083239
Author: Matthias Friedrich <matt@mafr.de>
Authored: Sat Sep 29 10:47:11 2012 +0200
Committer: Matthias Friedrich <matt@mafr.de>
Committed: Sun Sep 30 21:37:50 2012 +0200

----------------------------------------------------------------------
 crunch-archetype/pom.xml                           |   71 +++++++++
 .../META-INF/maven/archetype-metadata.xml          |   43 ++++++
 .../src/main/resources/archetype-resources/pom.xml |  114 +++++++++++++++
 .../src/main/assembly/hadoop-job.xml               |   49 ++++++
 .../src/main/java/StopWordFilter.java              |   50 +++++++
 .../src/main/java/Tokenizer.java                   |   43 ++++++
 .../src/main/java/WordCount.java                   |   85 +++++++++++
 .../src/test/java/StopWordFilterTest.java          |   41 +++++
 .../src/test/java/TokenizerTest.java               |   49 ++++++
 .../resources/projects/basic/archetype.properties  |   20 +++
 .../src/test/resources/projects/basic/goal.txt     |    1 +
 crunch/pom.xml                                     |   16 ++
 pom.xml                                            |    8 +
 13 files changed, 590 insertions(+), 0 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a79ce8a7/crunch-archetype/pom.xml
----------------------------------------------------------------------
diff --git a/crunch-archetype/pom.xml b/crunch-archetype/pom.xml
new file mode 100644
index 0000000..985141a
--- /dev/null
+++ b/crunch-archetype/pom.xml
@@ -0,0 +1,71 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.crunch</groupId>
+    <artifactId>crunch-parent</artifactId>
+    <version>0.4.0-incubating-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>crunch-archetype</artifactId>
+  <packaging>maven-archetype</packaging>
+
+  <name>Apache Crunch Archetype</name>
+  <description>
+    Create a basic, self-contained job for Apache Crunch.
+  </description>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-resources-plugin</artifactId>
+        <configuration>
+          <escapeString>\</escapeString>
+        </configuration>
+      </plugin>
+    </plugins>
+    <extensions>
+      <extension>
+        <groupId>org.apache.maven.archetype</groupId>
+        <artifactId>archetype-packaging</artifactId>
+        <version>2.2</version>
+      </extension>
+    </extensions>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+        <filtering>true</filtering>
+        <includes>
+          <include>**/pom.xml</include>
+        </includes>
+      </resource>
+      <resource>
+        <directory>src/main/resources</directory>
+        <filtering>false</filtering>
+        <excludes>
+          <exclude>**/pom.xml</exclude>
+        </excludes>
+      </resource>
+    </resources>
+  </build>
+
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a79ce8a7/crunch-archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
----------------------------------------------------------------------
diff --git a/crunch-archetype/src/main/resources/META-INF/maven/archetype-metadata.xml b/crunch-archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
new file mode 100644
index 0000000..d785cc5
--- /dev/null
+++ b/crunch-archetype/src/main/resources/META-INF/maven/archetype-metadata.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<archetype-descriptor xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0
http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd" name="crunch-demo"
+    xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <fileSets>
+    <fileSet filtered="true" packaged="true" encoding="UTF-8">
+      <directory>src/main/java</directory>
+      <includes>
+        <include>**/*.java</include>
+      </includes>
+    </fileSet>
+    <fileSet filtered="true" packaged="true" encoding="UTF-8">
+      <directory>src/test/java</directory>
+      <includes>
+        <include>**/*.java</include>
+      </includes>
+    </fileSet>
+    <fileSet filtered="true" encoding="UTF-8">
+      <directory>src/main/assembly</directory>
+      <includes>
+        <include>**/*.xml</include>
+      </includes>
+    </fileSet>
+  </fileSets>
+</archetype-descriptor>

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a79ce8a7/crunch-archetype/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/crunch-archetype/src/main/resources/archetype-resources/pom.xml b/crunch-archetype/src/main/resources/archetype-resources/pom.xml
new file mode 100644
index 0000000..4c5d8cf
--- /dev/null
+++ b/crunch-archetype/src/main/resources/archetype-resources/pom.xml
@@ -0,0 +1,114 @@
+##
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##   http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing,
+## software distributed under the License is distributed on an
+## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+## KIND, either express or implied.  See the License for the
+## specific language governing permissions and limitations
+## under the License.
+##
+<?xml version="1.0" encoding="UTF-8"?>
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>\${groupId}</groupId>
+  <artifactId>\${artifactId}</artifactId>
+  <version>\${version}</version>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+  <dependencies>
+    <!-- Some care is needed when adding new dependencies with Hadoop.
+         There are lots of libraries on Hadoop's runtime classpath that
+         the client or core POMs don't tell you about. When adding a new
+         dependency, first check Hadoop's lib directory if it's already
+         there. If it is, use the exact same version and set the scope
+         to "provided".
+
+         If there are dependency conflicts you cannot resolve, try setting
+         the HADOOP_USER_CLASSPATH_FIRST environment variable to "true",
+         giving your libraries precedence. -->
+    <dependency>
+      <groupId>org.apache.crunch</groupId>
+      <artifactId>crunch</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <!-- We use hadoop-core instead of hadoop-client because it contains
+         dependencies that are necessary for running LocalJobRunner from
+         an IDE. -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-core</artifactId>
+      <version>${hadoop.version}</version>
+      <scope>provided</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <version>${mockito.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>2.5.1</version>
+        <configuration>
+          <source>1.6</source>
+          <target>1.6</target>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <version>2.3</version>
+        <configuration>
+          <descriptors>
+            <descriptor>src/main/assembly/hadoop-job.xml</descriptor>
+          </descriptors>
+          <archive>
+            <manifest>
+              <mainClass>${package}.WordCount</mainClass>
+            </manifest>
+          </archive>
+        </configuration>
+        <executions>
+          <execution>
+            <id>make-assembly</id>
+            <phase>package</phase>
+            <goals>
+              <goal>single</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <prerequisites>
+    <maven>2.2.1</maven>
+  </prerequisites>
+
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a79ce8a7/crunch-archetype/src/main/resources/archetype-resources/src/main/assembly/hadoop-job.xml
----------------------------------------------------------------------
diff --git a/crunch-archetype/src/main/resources/archetype-resources/src/main/assembly/hadoop-job.xml
b/crunch-archetype/src/main/resources/archetype-resources/src/main/assembly/hadoop-job.xml
new file mode 100644
index 0000000..13dfee1
--- /dev/null
+++ b/crunch-archetype/src/main/resources/archetype-resources/src/main/assembly/hadoop-job.xml
@@ -0,0 +1,49 @@
+##
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##   http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing,
+## software distributed under the License is distributed on an
+## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+## KIND, either express or implied.  See the License for the
+## specific language governing permissions and limitations
+## under the License.
+##
+#set( $symbol_pound = '#' )
+#set( $symbol_dollar = '$' )
+#set( $symbol_escape = '\' )
+<?xml version="1.0" encoding="UTF-8"?>
+
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0
http://maven.apache.org/xsd/assembly-1.1.0.xsd">
+
+  <id>job</id>
+  <formats>
+    <format>jar</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+  <dependencySets>
+    <dependencySet>
+      <unpack>false</unpack>
+      <scope>runtime</scope>
+      <outputDirectory>lib</outputDirectory>
+      <excludes>
+        <exclude>${symbol_dollar}{groupId}:${symbol_dollar}{artifactId}</exclude>
+      </excludes>
+    </dependencySet>
+    <dependencySet>
+      <unpack>true</unpack>
+      <includes>
+        <include>${symbol_dollar}{groupId}:${symbol_dollar}{artifactId}</include>
+      </includes>
+    </dependencySet>
+  </dependencySets>
+</assembly>

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a79ce8a7/crunch-archetype/src/main/resources/archetype-resources/src/main/java/StopWordFilter.java
----------------------------------------------------------------------
diff --git a/crunch-archetype/src/main/resources/archetype-resources/src/main/java/StopWordFilter.java
b/crunch-archetype/src/main/resources/archetype-resources/src/main/java/StopWordFilter.java
new file mode 100644
index 0000000..ce03c2a
--- /dev/null
+++ b/crunch-archetype/src/main/resources/archetype-resources/src/main/java/StopWordFilter.java
@@ -0,0 +1,50 @@
+##
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##   http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing,
+## software distributed under the License is distributed on an
+## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+## KIND, either express or implied.  See the License for the
+## specific language governing permissions and limitations
+## under the License.
+##
+#set( $symbol_pound = '#' )
+#set( $symbol_dollar = '$' )
+#set( $symbol_escape = '\' )
+package ${package};
+
+import java.util.Set;
+
+import org.apache.crunch.FilterFn;
+
+import com.google.common.collect.ImmutableSet;
+
+
+/**
+ * A filter that removes known stop words.
+ */
+public class StopWordFilter extends FilterFn<String> {
+
+    // English stop words, borrowed from Lucene.
+    private static final Set<String> STOP_WORDS = ImmutableSet.copyOf(new String[]
{
+        "a", "and", "are", "as", "at", "be", "but", "by",
+        "for", "if", "in", "into", "is", "it",
+        "no", "not", "of", "on", "or", "s", "such",
+        "t", "that", "the", "their", "then", "there", "these",
+        "they", "this", "to", "was", "will", "with"
+    });
+
+    @Override
+    public boolean accept(String word) {
+        return !STOP_WORDS.contains(word);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a79ce8a7/crunch-archetype/src/main/resources/archetype-resources/src/main/java/Tokenizer.java
----------------------------------------------------------------------
diff --git a/crunch-archetype/src/main/resources/archetype-resources/src/main/java/Tokenizer.java
b/crunch-archetype/src/main/resources/archetype-resources/src/main/java/Tokenizer.java
new file mode 100644
index 0000000..728d441
--- /dev/null
+++ b/crunch-archetype/src/main/resources/archetype-resources/src/main/java/Tokenizer.java
@@ -0,0 +1,43 @@
+##
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##   http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing,
+## software distributed under the License is distributed on an
+## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+## KIND, either express or implied.  See the License for the
+## specific language governing permissions and limitations
+## under the License.
+##
+#set( $symbol_pound = '#' )
+#set( $symbol_dollar = '$' )
+#set( $symbol_escape = '\' )
+package ${package};
+
+
+import org.apache.crunch.DoFn;
+import org.apache.crunch.Emitter;
+
+import com.google.common.base.Splitter;
+
+
+/**
+ * Splits a line of text, filtering known stop words.
+ */
+public class Tokenizer extends DoFn<String, String> {
+    private static final Splitter SPLITTER = Splitter.onPattern("${symbol_escape}${symbol_escape}s+").omitEmptyStrings();
+
+    @Override
+    public void process(String line, Emitter<String> emitter) {
+        for (String word : SPLITTER.split(line)) {
+            emitter.emit(word);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a79ce8a7/crunch-archetype/src/main/resources/archetype-resources/src/main/java/WordCount.java
----------------------------------------------------------------------
diff --git a/crunch-archetype/src/main/resources/archetype-resources/src/main/java/WordCount.java
b/crunch-archetype/src/main/resources/archetype-resources/src/main/java/WordCount.java
new file mode 100644
index 0000000..41c5333
--- /dev/null
+++ b/crunch-archetype/src/main/resources/archetype-resources/src/main/java/WordCount.java
@@ -0,0 +1,85 @@
+##
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##   http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing,
+## software distributed under the License is distributed on an
+## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+## KIND, either express or implied.  See the License for the
+## specific language governing permissions and limitations
+## under the License.
+##
+#set( $symbol_pound = '#' )
+#set( $symbol_dollar = '$' )
+#set( $symbol_escape = '\' )
+package ${package};
+
+import org.apache.crunch.PCollection;
+import org.apache.crunch.PTable;
+import org.apache.crunch.Pipeline;
+import org.apache.crunch.PipelineResult;
+import org.apache.crunch.impl.mr.MRPipeline;
+import org.apache.crunch.types.writable.Writables;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+
+/**
+ * A word count example for Apache Crunch, based on Crunch's example projects.
+ */
+public class WordCount extends Configured implements Tool {
+
+    public static void main(String[] args) throws Exception {
+        ToolRunner.run(new Configuration(), new WordCount(), args);
+    }
+
+    public int run(String[] args) throws Exception {
+
+        if (args.length != 2) {
+            System.err.println("Usage: hadoop jar ${artifactId}-${version}-job.jar"
+                                      + " [generic options] input output");
+            System.err.println();
+            GenericOptionsParser.printGenericCommandUsage(System.err);
+            return 1;
+        }
+
+        String inputPath = args[0];
+        String outputPath = args[1];
+
+        // Create an object to coordinate pipeline creation and execution.
+        Pipeline pipeline = new MRPipeline(WordCount.class, getConf());
+
+        // Reference a given text file as a collection of Strings.
+        PCollection<String> lines = pipeline.readTextFile(inputPath);
+
+        // Define a function that splits each line in a PCollection of Strings into
+        // a PCollection made up of the individual words in the file.
+        // The second argument sets the serialization format.
+        PCollection<String> words = lines.parallelDo(new Tokenizer(), Writables.strings());
+
+        // Take the collection of words and remove known stop words.
+        PCollection<String> noStopWords = words.filter(new StopWordFilter());
+
+        // The count method applies a series of Crunch primitives and returns
+        // a map of the unique words in the input PCollection to their counts.
+        PTable<String, Long> counts = noStopWords.count();
+
+        // Instruct the pipeline to write the resulting counts to a text file.
+        pipeline.writeTextFile(counts, outputPath);
+
+        // Execute the pipeline as a MapReduce.
+        PipelineResult result = pipeline.done();
+
+        return result.succeeded() ? 0 : 1;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a79ce8a7/crunch-archetype/src/main/resources/archetype-resources/src/test/java/StopWordFilterTest.java
----------------------------------------------------------------------
diff --git a/crunch-archetype/src/main/resources/archetype-resources/src/test/java/StopWordFilterTest.java
b/crunch-archetype/src/main/resources/archetype-resources/src/test/java/StopWordFilterTest.java
new file mode 100644
index 0000000..820ec97
--- /dev/null
+++ b/crunch-archetype/src/main/resources/archetype-resources/src/test/java/StopWordFilterTest.java
@@ -0,0 +1,41 @@
+##
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##   http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing,
+## software distributed under the License is distributed on an
+## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+## KIND, either express or implied.  See the License for the
+## specific language governing permissions and limitations
+## under the License.
+##
+#set( $symbol_pound = '#' )
+#set( $symbol_dollar = '$' )
+#set( $symbol_escape = '\' )
+package ${package};
+
+import static org.junit.Assert.*;
+import static org.hamcrest.CoreMatchers.is;
+import org.apache.crunch.FilterFn;
+import org.junit.Test;
+
+
+public class StopWordFilterTest {
+
+  @Test
+  public void testFilter() {
+    FilterFn<String> filter = new StopWordFilter();
+
+    assertThat(filter.accept("foo"), is(true));
+    assertThat(filter.accept("the"), is(false));
+    assertThat(filter.accept("a"), is(false));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a79ce8a7/crunch-archetype/src/main/resources/archetype-resources/src/test/java/TokenizerTest.java
----------------------------------------------------------------------
diff --git a/crunch-archetype/src/main/resources/archetype-resources/src/test/java/TokenizerTest.java
b/crunch-archetype/src/main/resources/archetype-resources/src/test/java/TokenizerTest.java
new file mode 100644
index 0000000..7691e54
--- /dev/null
+++ b/crunch-archetype/src/main/resources/archetype-resources/src/test/java/TokenizerTest.java
@@ -0,0 +1,49 @@
+##
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##   http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing,
+## software distributed under the License is distributed on an
+## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+## KIND, either express or implied.  See the License for the
+## specific language governing permissions and limitations
+## under the License.
+##
+#set( $symbol_pound = '#' )
+#set( $symbol_dollar = '$' )
+#set( $symbol_escape = '\' )
+package ${package};
+
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+
+import org.apache.crunch.Emitter;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+
+
+@RunWith(MockitoJUnitRunner.class)
+public class TokenizerTest {
+  @Mock
+  private Emitter<String> emitter;
+
+  @Test
+  public void testProcess() {
+    Tokenizer splitter = new Tokenizer();
+    splitter.process("  foo  bar ", emitter);
+
+    verify(emitter).emit("foo");
+    verify(emitter).emit("bar");
+    verifyNoMoreInteractions(emitter);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a79ce8a7/crunch-archetype/src/test/resources/projects/basic/archetype.properties
----------------------------------------------------------------------
diff --git a/crunch-archetype/src/test/resources/projects/basic/archetype.properties b/crunch-archetype/src/test/resources/projects/basic/archetype.properties
new file mode 100644
index 0000000..e4699b0
--- /dev/null
+++ b/crunch-archetype/src/test/resources/projects/basic/archetype.properties
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+package=it.pkg
+version=0.1-SNAPSHOT
+groupId=archetype.it
+artifactId=basic

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a79ce8a7/crunch-archetype/src/test/resources/projects/basic/goal.txt
----------------------------------------------------------------------
diff --git a/crunch-archetype/src/test/resources/projects/basic/goal.txt b/crunch-archetype/src/test/resources/projects/basic/goal.txt
new file mode 100644
index 0000000..3b85e6a
--- /dev/null
+++ b/crunch-archetype/src/test/resources/projects/basic/goal.txt
@@ -0,0 +1 @@
+clean package

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a79ce8a7/crunch/pom.xml
----------------------------------------------------------------------
diff --git a/crunch/pom.xml b/crunch/pom.xml
index 912cd90..4dc34f1 100644
--- a/crunch/pom.xml
+++ b/crunch/pom.xml
@@ -50,6 +50,22 @@ under the License.
       <artifactId>avro-mapred</artifactId>
     </dependency>
 
+    <!-- Override the slf4j dependency from Avro, which is incompatible with
+         Hadoop's. We don't use the "provided" scope so that clients get the
+	 correct version, too. -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+
+    <!-- LocalJobRunner needs commons-io, but hadoop-core doesn't reference
+         it. We add it here so that clients can run Crunch-based applications
+	 from the IDE. Note that this wouldn't work with "provided" scope! -->
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+
     <dependency>
       <groupId>commons-codec</groupId>
       <artifactId>commons-codec</artifactId>

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a79ce8a7/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index d3bf8c6..c8e5e94 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,6 +48,7 @@ under the License.
     <module>crunch-hbase</module>
     <module>crunch-test</module>
     <module>crunch-examples</module>
+    <module>crunch-archetype</module>
     <module>crunch-scrunch</module>
   </modules>
 
@@ -591,6 +592,7 @@ under the License.
             <exclude>.gitignore</exclude>
             <exclude>.repository/</exclude>
             <exclude>**/resources/*.txt</exclude>
+            <exclude>**/goal.txt</exclude>
             <exclude>**/target/generated-test-sources/**</exclude>
             <exclude>**/scripts/scrunch</exclude>
             </excludes>
@@ -651,6 +653,12 @@ under the License.
             </execution>
           </executions>
         </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-archetype-plugin</artifactId>
+          <version>2.2</version>
+          <extensions>true</extensions>
+        </plugin>
         <!-- The m2eclipse plugin doesn't support all maven plugins we use, so
              we tell it to ignore them to avoid errors in Eclipse. -->
         <plugin>


Mime
View raw message