incubator-crunch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jwi...@apache.org
Subject [1/28] git commit: Rename com.cloudera.crunch -> org.apache.crunch in the Java core
Date Sat, 07 Jul 2012 21:49:06 GMT
Updated Branches:
  refs/heads/master 9cf194578 -> 2ff7247c0


Rename com.cloudera.crunch -> org.apache.crunch in the Java core

Signed-off-by: Josh Wills <jwills@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-crunch/commit/5accc9ac
Tree: http://git-wip-us.apache.org/repos/asf/incubator-crunch/tree/5accc9ac
Diff: http://git-wip-us.apache.org/repos/asf/incubator-crunch/diff/5accc9ac

Branch: refs/heads/master
Commit: 5accc9ac142740937e3114fb33b9a18d3beb1f43
Parents: 9cf1945
Author: Josh Wills <jwills@cloudera.com>
Authored: Sat Jul 7 10:52:20 2012 -0700
Committer: Josh Wills <jwills@cloudera.com>
Committed: Sat Jul 7 14:21:30 2012 -0700

----------------------------------------------------------------------
 src/main/java/com/cloudera/crunch/CombineFn.java   |  805 -----------
 src/main/java/com/cloudera/crunch/DoFn.java        |  182 ---
 src/main/java/com/cloudera/crunch/Emitter.java     |   35 -
 src/main/java/com/cloudera/crunch/FilterFn.java    |  132 --
 .../java/com/cloudera/crunch/GroupingOptions.java  |  128 --
 src/main/java/com/cloudera/crunch/MapFn.java       |   39 -
 src/main/java/com/cloudera/crunch/PCollection.java |  181 ---
 .../java/com/cloudera/crunch/PGroupedTable.java    |   37 -
 src/main/java/com/cloudera/crunch/PTable.java      |  131 --
 src/main/java/com/cloudera/crunch/Pair.java        |  103 --
 src/main/java/com/cloudera/crunch/Pipeline.java    |  103 --
 .../java/com/cloudera/crunch/PipelineResult.java   |   72 -
 src/main/java/com/cloudera/crunch/Source.java      |   48 -
 .../java/com/cloudera/crunch/SourceTarget.java     |   24 -
 src/main/java/com/cloudera/crunch/TableSource.java |   25 -
 src/main/java/com/cloudera/crunch/Target.java      |   28 -
 src/main/java/com/cloudera/crunch/Tuple.java       |   34 -
 src/main/java/com/cloudera/crunch/Tuple3.java      |   94 --
 src/main/java/com/cloudera/crunch/Tuple4.java      |  104 --
 src/main/java/com/cloudera/crunch/TupleN.java      |   67 -
 .../com/cloudera/crunch/fn/CompositeMapFn.java     |   68 -
 .../java/com/cloudera/crunch/fn/ExtractKeyFn.java  |   41 -
 .../java/com/cloudera/crunch/fn/IdentityFn.java    |   36 -
 .../java/com/cloudera/crunch/fn/MapKeysFn.java     |   29 -
 .../java/com/cloudera/crunch/fn/MapValuesFn.java   |   29 -
 .../java/com/cloudera/crunch/fn/PairMapFn.java     |   61 -
 .../com/cloudera/crunch/impl/mem/MemPipeline.java  |  206 ---
 .../crunch/impl/mem/collect/MemCollection.java     |  202 ---
 .../crunch/impl/mem/collect/MemGroupedTable.java   |  123 --
 .../cloudera/crunch/impl/mem/collect/MemTable.java |  143 --
 .../com/cloudera/crunch/impl/mr/MRPipeline.java    |  320 -----
 .../crunch/impl/mr/collect/DoCollectionImpl.java   |   62 -
 .../crunch/impl/mr/collect/DoTableImpl.java        |   76 -
 .../crunch/impl/mr/collect/InputCollection.java    |   82 --
 .../crunch/impl/mr/collect/InputTable.java         |   78 -
 .../crunch/impl/mr/collect/PCollectionImpl.java    |  238 ----
 .../crunch/impl/mr/collect/PGroupedTableImpl.java  |  117 --
 .../crunch/impl/mr/collect/PTableBase.java         |  120 --
 .../crunch/impl/mr/collect/UnionCollection.java    |   78 -
 .../crunch/impl/mr/collect/UnionTable.java         |   91 --
 .../crunch/impl/mr/emit/IntermediateEmitter.java   |   46 -
 .../crunch/impl/mr/emit/MultipleOutputEmitter.java |   55 -
 .../crunch/impl/mr/emit/OutputEmitter.java         |   51 -
 .../cloudera/crunch/impl/mr/exec/CrunchJob.java    |  117 --
 .../cloudera/crunch/impl/mr/exec/MRExecutor.java   |   73 -
 .../com/cloudera/crunch/impl/mr/plan/DoNode.java   |  161 ---
 .../crunch/impl/mr/plan/JobNameBuilder.java        |   77 -
 .../cloudera/crunch/impl/mr/plan/JobPrototype.java |  221 ---
 .../crunch/impl/mr/plan/MSCROutputHandler.java     |   72 -
 .../cloudera/crunch/impl/mr/plan/MSCRPlanner.java  |  373 -----
 .../com/cloudera/crunch/impl/mr/plan/NodePath.java |  102 --
 .../crunch/impl/mr/plan/PlanningParameters.java    |   24 -
 .../crunch/impl/mr/run/CrunchCombiner.java         |   24 -
 .../crunch/impl/mr/run/CrunchInputFormat.java      |   78 -
 .../crunch/impl/mr/run/CrunchInputSplit.java       |  132 --
 .../cloudera/crunch/impl/mr/run/CrunchInputs.java  |   72 -
 .../cloudera/crunch/impl/mr/run/CrunchMapper.java  |   70 -
 .../crunch/impl/mr/run/CrunchRecordReader.java     |   77 -
 .../cloudera/crunch/impl/mr/run/CrunchReducer.java |   68 -
 .../crunch/impl/mr/run/CrunchRuntimeException.java |   26 -
 .../crunch/impl/mr/run/CrunchTaskContext.java      |   85 --
 .../cloudera/crunch/impl/mr/run/NodeContext.java   |   30 -
 .../com/cloudera/crunch/impl/mr/run/RTNode.java    |  119 --
 .../crunch/impl/mr/run/RuntimeParameters.java      |   31 -
 .../impl/mr/run/TaskAttemptContextFactory.java     |   65 -
 src/main/java/com/cloudera/crunch/io/At.java       |   86 --
 .../cloudera/crunch/io/CompositePathIterable.java  |   99 --
 .../com/cloudera/crunch/io/FileReaderFactory.java  |   24 -
 src/main/java/com/cloudera/crunch/io/From.java     |  104 --
 .../com/cloudera/crunch/io/MapReduceTarget.java    |   25 -
 .../java/com/cloudera/crunch/io/OutputHandler.java |   22 -
 .../java/com/cloudera/crunch/io/PathTarget.java    |   21 -
 .../com/cloudera/crunch/io/PathTargetImpl.java     |   53 -
 .../com/cloudera/crunch/io/ReadableSource.java     |   25 -
 .../cloudera/crunch/io/ReadableSourceTarget.java   |   26 -
 .../com/cloudera/crunch/io/SourceTargetHelper.java |   65 -
 src/main/java/com/cloudera/crunch/io/To.java       |   69 -
 .../crunch/io/avro/AvroFileReaderFactory.java      |   87 --
 .../cloudera/crunch/io/avro/AvroFileSource.java    |   52 -
 .../crunch/io/avro/AvroFileSourceTarget.java       |   31 -
 .../cloudera/crunch/io/avro/AvroFileTarget.java    |   83 --
 .../crunch/io/hbase/HBaseSourceTarget.java         |  106 --
 .../com/cloudera/crunch/io/hbase/HBaseTarget.java  |   91 --
 .../cloudera/crunch/io/impl/FileSourceImpl.java    |  101 --
 .../crunch/io/impl/FileTableSourceImpl.java        |   36 -
 .../cloudera/crunch/io/impl/FileTargetImpl.java    |  101 --
 .../com/cloudera/crunch/io/impl/InputBundle.java   |  111 --
 .../io/impl/ReadableSourcePathTargetImpl.java      |   37 -
 .../crunch/io/impl/ReadableSourceTargetImpl.java   |   36 -
 .../crunch/io/impl/SourcePathTargetImpl.java       |   41 -
 .../cloudera/crunch/io/impl/SourceTargetImpl.java  |   82 --
 .../crunch/io/impl/TableSourcePathTargetImpl.java  |   33 -
 .../crunch/io/impl/TableSourceTargetImpl.java      |   33 -
 .../com/cloudera/crunch/io/seq/SeqFileHelper.java  |   34 -
 .../crunch/io/seq/SeqFileReaderFactory.java        |   91 --
 .../com/cloudera/crunch/io/seq/SeqFileSource.java  |   45 -
 .../crunch/io/seq/SeqFileSourceTarget.java         |   35 -
 .../crunch/io/seq/SeqFileTableReaderFactory.java   |   97 --
 .../cloudera/crunch/io/seq/SeqFileTableSource.java |   55 -
 .../crunch/io/seq/SeqFileTableSourceTarget.java    |   45 -
 .../com/cloudera/crunch/io/seq/SeqFileTarget.java  |   47 -
 .../crunch/io/text/BZip2TextInputFormat.java       |  242 ----
 .../cloudera/crunch/io/text/CBZip2InputStream.java | 1084 ---------------
 .../crunch/io/text/TextFileReaderFactory.java      |   93 --
 .../cloudera/crunch/io/text/TextFileSource.java    |   73 -
 .../crunch/io/text/TextFileSourceTarget.java       |   35 -
 .../cloudera/crunch/io/text/TextFileTarget.java    |   51 -
 .../java/com/cloudera/crunch/lib/Aggregate.java    |  238 ----
 .../java/com/cloudera/crunch/lib/Cartesian.java    |  222 ---
 src/main/java/com/cloudera/crunch/lib/Cogroup.java |   88 --
 src/main/java/com/cloudera/crunch/lib/Join.java    |  148 --
 src/main/java/com/cloudera/crunch/lib/PTables.java |   92 --
 src/main/java/com/cloudera/crunch/lib/Sample.java  |   59 -
 src/main/java/com/cloudera/crunch/lib/Set.java     |  122 --
 src/main/java/com/cloudera/crunch/lib/Sort.java    |  544 --------
 .../cloudera/crunch/lib/join/FullOuterJoinFn.java  |   96 --
 .../com/cloudera/crunch/lib/join/InnerJoinFn.java  |   72 -
 .../java/com/cloudera/crunch/lib/join/JoinFn.java  |   70 -
 .../com/cloudera/crunch/lib/join/JoinUtils.java    |  124 --
 .../cloudera/crunch/lib/join/LeftOuterJoinFn.java  |   92 --
 .../com/cloudera/crunch/lib/join/MapsideJoin.java  |  143 --
 .../cloudera/crunch/lib/join/RightOuterJoinFn.java |   77 -
 .../crunch/materialize/MaterializableIterable.java |   62 -
 .../crunch/materialize/MaterializableMap.java      |   47 -
 .../java/com/cloudera/crunch/test/FileHelper.java  |   39 -
 .../com/cloudera/crunch/test/InMemoryEmitter.java  |   52 -
 .../com/cloudera/crunch/test/TestCounters.java     |   38 -
 .../java/com/cloudera/crunch/tool/CrunchTool.java  |  101 --
 .../java/com/cloudera/crunch/types/Converter.java  |   38 -
 .../cloudera/crunch/types/PGroupedTableType.java   |  122 --
 .../java/com/cloudera/crunch/types/PTableType.java |   42 -
 src/main/java/com/cloudera/crunch/types/PType.java |   80 --
 .../com/cloudera/crunch/types/PTypeFamily.java     |   77 -
 .../java/com/cloudera/crunch/types/PTypeUtils.java |   63 -
 .../com/cloudera/crunch/types/TupleFactory.java    |   95 --
 .../cloudera/crunch/types/avro/AvroDeepCopier.java |  134 --
 .../crunch/types/avro/AvroGroupedTableType.java    |  107 --
 .../crunch/types/avro/AvroInputFormat.java         |   59 -
 .../crunch/types/avro/AvroKeyConverter.java        |   64 -
 .../crunch/types/avro/AvroOutputFormat.java        |   72 -
 .../crunch/types/avro/AvroPairConverter.java       |  106 --
 .../crunch/types/avro/AvroRecordReader.java        |  116 --
 .../cloudera/crunch/types/avro/AvroTableType.java  |  164 ---
 .../com/cloudera/crunch/types/avro/AvroType.java   |  169 ---
 .../cloudera/crunch/types/avro/AvroTypeFamily.java |  166 ---
 .../crunch/types/avro/AvroUtf8InputFormat.java     |  102 --
 .../java/com/cloudera/crunch/types/avro/Avros.java |  633 ---------
 .../crunch/types/avro/ReflectDataFactory.java      |   36 -
 .../crunch/types/avro/SafeAvroSerialization.java   |  152 --
 .../types/writable/GenericArrayWritable.java       |  125 --
 .../crunch/types/writable/TextMapWritable.java     |   86 --
 .../crunch/types/writable/TupleWritable.java       |  226 ---
 .../types/writable/WritableGroupedTableType.java   |   78 -
 .../types/writable/WritablePairConverter.java      |   59 -
 .../crunch/types/writable/WritableTableType.java   |  123 --
 .../crunch/types/writable/WritableType.java        |  119 --
 .../crunch/types/writable/WritableTypeFamily.java  |  148 --
 .../types/writable/WritableValueConverter.java     |   58 -
 .../cloudera/crunch/types/writable/Writables.java  |  631 ---------
 .../java/com/cloudera/crunch/util/Collects.java    |   45 -
 .../java/com/cloudera/crunch/util/DistCache.java   |  158 ---
 src/main/java/com/cloudera/crunch/util/PTypes.java |  256 ----
 src/main/java/com/cloudera/crunch/util/Protos.java |  147 --
 src/main/java/com/cloudera/crunch/util/Tuples.java |  148 --
 src/main/java/org/apache/crunch/CombineFn.java     |  807 +++++++++++
 src/main/java/org/apache/crunch/DoFn.java          |  184 +++
 src/main/java/org/apache/crunch/Emitter.java       |   37 +
 src/main/java/org/apache/crunch/FilterFn.java      |  134 ++
 .../java/org/apache/crunch/GroupingOptions.java    |  130 ++
 src/main/java/org/apache/crunch/MapFn.java         |   41 +
 src/main/java/org/apache/crunch/PCollection.java   |  183 +++
 src/main/java/org/apache/crunch/PGroupedTable.java |   39 +
 src/main/java/org/apache/crunch/PTable.java        |  133 ++
 src/main/java/org/apache/crunch/Pair.java          |  105 ++
 src/main/java/org/apache/crunch/Pipeline.java      |  106 ++
 .../java/org/apache/crunch/PipelineResult.java     |   75 +
 src/main/java/org/apache/crunch/Source.java        |   51 +
 src/main/java/org/apache/crunch/SourceTarget.java  |   27 +
 src/main/java/org/apache/crunch/TableSource.java   |   28 +
 src/main/java/org/apache/crunch/Target.java        |   31 +
 src/main/java/org/apache/crunch/Tuple.java         |   36 +
 src/main/java/org/apache/crunch/Tuple3.java        |   96 ++
 src/main/java/org/apache/crunch/Tuple4.java        |  106 ++
 src/main/java/org/apache/crunch/TupleN.java        |   69 +
 .../java/org/apache/crunch/fn/CompositeMapFn.java  |   71 +
 .../java/org/apache/crunch/fn/ExtractKeyFn.java    |   44 +
 src/main/java/org/apache/crunch/fn/IdentityFn.java |   39 +
 src/main/java/org/apache/crunch/fn/MapKeysFn.java  |   32 +
 .../java/org/apache/crunch/fn/MapValuesFn.java     |   32 +
 src/main/java/org/apache/crunch/fn/PairMapFn.java  |   64 +
 .../org/apache/crunch/impl/mem/MemPipeline.java    |  209 +++
 .../crunch/impl/mem/collect/MemCollection.java     |  205 +++
 .../crunch/impl/mem/collect/MemGroupedTable.java   |  126 ++
 .../apache/crunch/impl/mem/collect/MemTable.java   |  146 ++
 .../java/org/apache/crunch/impl/mr/MRPipeline.java |  322 +++++
 .../crunch/impl/mr/collect/DoCollectionImpl.java   |   65 +
 .../apache/crunch/impl/mr/collect/DoTableImpl.java |   79 ++
 .../crunch/impl/mr/collect/InputCollection.java    |   85 ++
 .../apache/crunch/impl/mr/collect/InputTable.java  |   81 ++
 .../crunch/impl/mr/collect/PCollectionImpl.java    |  241 ++++
 .../crunch/impl/mr/collect/PGroupedTableImpl.java  |  120 ++
 .../apache/crunch/impl/mr/collect/PTableBase.java  |  123 ++
 .../crunch/impl/mr/collect/UnionCollection.java    |   81 ++
 .../apache/crunch/impl/mr/collect/UnionTable.java  |   94 ++
 .../crunch/impl/mr/emit/IntermediateEmitter.java   |   49 +
 .../crunch/impl/mr/emit/MultipleOutputEmitter.java |   58 +
 .../apache/crunch/impl/mr/emit/OutputEmitter.java  |   54 +
 .../org/apache/crunch/impl/mr/exec/CrunchJob.java  |  120 ++
 .../org/apache/crunch/impl/mr/exec/MRExecutor.java |   76 +
 .../org/apache/crunch/impl/mr/plan/DoNode.java     |  164 +++
 .../apache/crunch/impl/mr/plan/JobNameBuilder.java |   80 ++
 .../apache/crunch/impl/mr/plan/JobPrototype.java   |  224 +++
 .../crunch/impl/mr/plan/MSCROutputHandler.java     |   75 +
 .../apache/crunch/impl/mr/plan/MSCRPlanner.java    |  376 +++++
 .../org/apache/crunch/impl/mr/plan/NodePath.java   |  105 ++
 .../crunch/impl/mr/plan/PlanningParameters.java    |   27 +
 .../apache/crunch/impl/mr/run/CrunchCombiner.java  |   27 +
 .../crunch/impl/mr/run/CrunchInputFormat.java      |   77 +
 .../crunch/impl/mr/run/CrunchInputSplit.java       |  131 ++
 .../apache/crunch/impl/mr/run/CrunchInputs.java    |   71 +
 .../apache/crunch/impl/mr/run/CrunchMapper.java    |   73 +
 .../crunch/impl/mr/run/CrunchRecordReader.java     |   76 +
 .../apache/crunch/impl/mr/run/CrunchReducer.java   |   71 +
 .../crunch/impl/mr/run/CrunchRuntimeException.java |   43 +
 .../crunch/impl/mr/run/CrunchTaskContext.java      |   88 ++
 .../org/apache/crunch/impl/mr/run/NodeContext.java |   33 +
 .../java/org/apache/crunch/impl/mr/run/RTNode.java |  122 ++
 .../crunch/impl/mr/run/RuntimeParameters.java      |   34 +
 .../impl/mr/run/TaskAttemptContextFactory.java     |   68 +
 src/main/java/org/apache/crunch/io/At.java         |   89 ++
 .../apache/crunch/io/CompositePathIterable.java    |  102 ++
 .../org/apache/crunch/io/FileReaderFactory.java    |   27 +
 src/main/java/org/apache/crunch/io/From.java       |  107 ++
 .../java/org/apache/crunch/io/MapReduceTarget.java |   28 +
 .../java/org/apache/crunch/io/OutputHandler.java   |   25 +
 src/main/java/org/apache/crunch/io/PathTarget.java |   24 +
 .../java/org/apache/crunch/io/PathTargetImpl.java  |   70 +
 .../java/org/apache/crunch/io/ReadableSource.java  |   28 +
 .../org/apache/crunch/io/ReadableSourceTarget.java |   29 +
 .../org/apache/crunch/io/SourceTargetHelper.java   |   68 +
 src/main/java/org/apache/crunch/io/To.java         |   72 +
 .../crunch/io/avro/AvroFileReaderFactory.java      |   90 ++
 .../org/apache/crunch/io/avro/AvroFileSource.java  |   55 +
 .../crunch/io/avro/AvroFileSourceTarget.java       |   34 +
 .../org/apache/crunch/io/avro/AvroFileTarget.java  |   86 ++
 .../apache/crunch/io/hbase/HBaseSourceTarget.java  |  109 ++
 .../org/apache/crunch/io/hbase/HBaseTarget.java    |   94 ++
 .../org/apache/crunch/io/impl/FileSourceImpl.java  |  104 ++
 .../apache/crunch/io/impl/FileTableSourceImpl.java |   39 +
 .../org/apache/crunch/io/impl/FileTargetImpl.java  |  104 ++
 .../org/apache/crunch/io/impl/InputBundle.java     |  114 ++
 .../io/impl/ReadableSourcePathTargetImpl.java      |   40 +
 .../crunch/io/impl/ReadableSourceTargetImpl.java   |   39 +
 .../crunch/io/impl/SourcePathTargetImpl.java       |   44 +
 .../apache/crunch/io/impl/SourceTargetImpl.java    |   85 ++
 .../crunch/io/impl/TableSourcePathTargetImpl.java  |   36 +
 .../crunch/io/impl/TableSourceTargetImpl.java      |   36 +
 .../org/apache/crunch/io/seq/SeqFileHelper.java    |   37 +
 .../apache/crunch/io/seq/SeqFileReaderFactory.java |   94 ++
 .../org/apache/crunch/io/seq/SeqFileSource.java    |   48 +
 .../apache/crunch/io/seq/SeqFileSourceTarget.java  |   38 +
 .../crunch/io/seq/SeqFileTableReaderFactory.java   |  100 ++
 .../apache/crunch/io/seq/SeqFileTableSource.java   |   58 +
 .../crunch/io/seq/SeqFileTableSourceTarget.java    |   48 +
 .../org/apache/crunch/io/seq/SeqFileTarget.java    |   50 +
 .../crunch/io/text/BZip2TextInputFormat.java       |  242 ++++
 .../apache/crunch/io/text/CBZip2InputStream.java   | 1025 ++++++++++++++
 .../crunch/io/text/TextFileReaderFactory.java      |   96 ++
 .../org/apache/crunch/io/text/TextFileSource.java  |   76 +
 .../crunch/io/text/TextFileSourceTarget.java       |   38 +
 .../org/apache/crunch/io/text/TextFileTarget.java  |   54 +
 src/main/java/org/apache/crunch/lib/Aggregate.java |  241 ++++
 src/main/java/org/apache/crunch/lib/Cartesian.java |  225 +++
 src/main/java/org/apache/crunch/lib/Cogroup.java   |   91 ++
 src/main/java/org/apache/crunch/lib/Join.java      |  151 ++
 src/main/java/org/apache/crunch/lib/PTables.java   |   95 ++
 src/main/java/org/apache/crunch/lib/Sample.java    |   62 +
 src/main/java/org/apache/crunch/lib/Set.java       |  125 ++
 src/main/java/org/apache/crunch/lib/Sort.java      |  547 ++++++++
 .../apache/crunch/lib/join/FullOuterJoinFn.java    |   99 ++
 .../org/apache/crunch/lib/join/InnerJoinFn.java    |   75 +
 .../java/org/apache/crunch/lib/join/JoinFn.java    |   73 +
 .../java/org/apache/crunch/lib/join/JoinUtils.java |  127 ++
 .../apache/crunch/lib/join/LeftOuterJoinFn.java    |   95 ++
 .../org/apache/crunch/lib/join/MapsideJoin.java    |  160 +++
 .../apache/crunch/lib/join/RightOuterJoinFn.java   |   80 ++
 .../crunch/materialize/MaterializableIterable.java |   65 +
 .../crunch/materialize/MaterializableMap.java      |   50 +
 .../java/org/apache/crunch/test/FileHelper.java    |   42 +
 .../org/apache/crunch/test/InMemoryEmitter.java    |   55 +
 .../java/org/apache/crunch/test/TestCounters.java  |   41 +
 .../java/org/apache/crunch/tool/CrunchTool.java    |  104 ++
 .../java/org/apache/crunch/types/Converter.java    |   41 +
 .../org/apache/crunch/types/PGroupedTableType.java |  124 ++
 .../java/org/apache/crunch/types/PTableType.java   |   44 +
 src/main/java/org/apache/crunch/types/PType.java   |   82 ++
 .../java/org/apache/crunch/types/PTypeFamily.java  |   79 ++
 .../java/org/apache/crunch/types/PTypeUtils.java   |   66 +
 .../java/org/apache/crunch/types/TupleFactory.java |   98 ++
 .../apache/crunch/types/avro/AvroDeepCopier.java   |  151 ++
 .../crunch/types/avro/AvroGroupedTableType.java    |  110 ++
 .../apache/crunch/types/avro/AvroInputFormat.java  |   58 +
 .../apache/crunch/types/avro/AvroKeyConverter.java |   67 +
 .../apache/crunch/types/avro/AvroOutputFormat.java |   71 +
 .../crunch/types/avro/AvroPairConverter.java       |  109 ++
 .../apache/crunch/types/avro/AvroRecordReader.java |  115 ++
 .../apache/crunch/types/avro/AvroTableType.java    |  167 +++
 .../org/apache/crunch/types/avro/AvroType.java     |  172 +++
 .../apache/crunch/types/avro/AvroTypeFamily.java   |  169 +++
 .../crunch/types/avro/AvroUtf8InputFormat.java     |  102 ++
 .../java/org/apache/crunch/types/avro/Avros.java   |  636 +++++++++
 .../crunch/types/avro/ReflectDataFactory.java      |   39 +
 .../crunch/types/avro/SafeAvroSerialization.java   |  151 ++
 .../types/writable/GenericArrayWritable.java       |  128 ++
 .../crunch/types/writable/TextMapWritable.java     |   88 ++
 .../crunch/types/writable/TupleWritable.java       |  225 +++
 .../types/writable/WritableGroupedTableType.java   |   81 ++
 .../types/writable/WritablePairConverter.java      |   62 +
 .../crunch/types/writable/WritableTableType.java   |  126 ++
 .../apache/crunch/types/writable/WritableType.java |  122 ++
 .../crunch/types/writable/WritableTypeFamily.java  |  151 ++
 .../types/writable/WritableValueConverter.java     |   61 +
 .../apache/crunch/types/writable/Writables.java    |  634 +++++++++
 src/main/java/org/apache/crunch/util/Collects.java |   48 +
 .../java/org/apache/crunch/util/DistCache.java     |  161 +++
 src/main/java/org/apache/crunch/util/PTypes.java   |  259 ++++
 src/main/java/org/apache/crunch/util/Protos.java   |  150 ++
 src/main/java/org/apache/crunch/util/Tuples.java   |  151 ++
 .../lib/jobcontrol/CrunchControlledJob.java        |    1 -
 .../mapreduce/lib/jobcontrol/CrunchJobControl.java |    1 -
 .../lib/output/CrunchMultipleOutputs.java          |    2 +-
 src/main/resources/log4j.properties                |    2 +-
 .../java/com/cloudera/crunch/CollectionsTest.java  |  109 --
 .../java/com/cloudera/crunch/CombineFnTest.java    |  203 ---
 .../java/com/cloudera/crunch/FilterFnTest.java     |   60 -
 src/test/java/com/cloudera/crunch/MapsTest.java    |   76 -
 .../java/com/cloudera/crunch/MaterializeTest.java  |   90 --
 .../com/cloudera/crunch/MaterializeToMapTest.java  |   75 -
 .../com/cloudera/crunch/MultipleOutputTest.java    |  107 --
 .../cloudera/crunch/PCollectionGetSizeTest.java    |  137 --
 .../com/cloudera/crunch/PTableKeyValueTest.java    |   90 --
 .../java/com/cloudera/crunch/PageRankTest.java     |  163 ---
 src/test/java/com/cloudera/crunch/PairTest.java    |   63 -
 src/test/java/com/cloudera/crunch/TFIDFTest.java   |  230 ---
 .../com/cloudera/crunch/TermFrequencyTest.java     |  131 --
 .../java/com/cloudera/crunch/TextPairTest.java     |   66 -
 .../cloudera/crunch/TupleNClassCastBugTest.java    |   92 --
 src/test/java/com/cloudera/crunch/TupleTest.java   |  139 --
 .../com/cloudera/crunch/WordCountHBaseTest.java    |  203 ---
 .../java/com/cloudera/crunch/WordCountTest.java    |  169 ---
 .../com/cloudera/crunch/fn/ExtractKeyFnTest.java   |   43 -
 .../java/com/cloudera/crunch/fn/MapKeysTest.java   |   50 -
 .../java/com/cloudera/crunch/fn/MapValuesTest.java |   48 -
 .../java/com/cloudera/crunch/fn/PairMapTest.java   |   50 -
 .../com/cloudera/crunch/fn/StoreLastEmitter.java   |   38 -
 .../impl/mem/MemPipelineFileWritingTest.java       |   50 -
 .../cloudera/crunch/impl/mr/MRPipelineTest.java    |   60 -
 .../impl/mr/collect/DoCollectionImplTest.java      |  101 --
 .../crunch/impl/mr/collect/DoTableImplTest.java    |   71 -
 .../impl/mr/collect/UnionCollectionTest.java       |  144 --
 .../crunch/impl/mr/plan/JobNameBuilderTest.java    |   38 -
 .../crunch/io/CompositePathIterableTest.java       |   64 -
 .../cloudera/crunch/io/SourceTargetHelperTest.java |   42 -
 .../crunch/io/avro/AvroFileReaderFactoryTest.java  |  155 --
 .../crunch/io/avro/AvroFileSourceTargetTest.java   |  150 --
 .../crunch/io/avro/AvroFileSourceTest.java         |   79 --
 .../cloudera/crunch/io/avro/AvroReflectTest.java   |   98 --
 .../com/cloudera/crunch/lib/AggregateTest.java     |  229 ---
 .../lib/AvroIndexedRecordPartitionerTest.java      |   97 --
 .../com/cloudera/crunch/lib/AvroTypeSortTest.java  |  145 --
 .../com/cloudera/crunch/lib/CartesianTest.java     |   48 -
 .../java/com/cloudera/crunch/lib/CogroupTest.java  |  123 --
 .../java/com/cloudera/crunch/lib/SampleTest.java   |   20 -
 src/test/java/com/cloudera/crunch/lib/SetTest.java |  112 --
 .../java/com/cloudera/crunch/lib/SortTest.java     |  279 ----
 .../crunch/lib/SpecificAvroGroupByTest.java        |  136 --
 .../crunch/lib/TupleWritablePartitionerTest.java   |   53 -
 .../crunch/lib/join/FullOuterJoinTest.java         |   48 -
 .../cloudera/crunch/lib/join/InnerJoinTest.java    |   48 -
 .../com/cloudera/crunch/lib/join/JoinTester.java   |  104 --
 .../crunch/lib/join/LeftOuterJoinTest.java         |   48 -
 .../cloudera/crunch/lib/join/MapsideJoinTest.java  |  102 --
 .../crunch/lib/join/MultiAvroSchemaJoinTest.java   |  113 --
 .../crunch/lib/join/RightOuterJoinTest.java        |   48 -
 .../com/cloudera/crunch/test/CountersTest.java     |   63 -
 .../java/com/cloudera/crunch/test/Employee.java    |  218 ---
 src/test/java/com/cloudera/crunch/test/Person.java |  218 ---
 .../com/cloudera/crunch/types/PTypeUtilsTest.java  |   88 --
 .../crunch/types/avro/AvroDeepCopierTest.java      |   58 -
 .../types/avro/AvroGroupedTableTypeTest.java       |   41 -
 .../crunch/types/avro/AvroTableTypeTest.java       |   35 -
 .../cloudera/crunch/types/avro/AvroTypeTest.java   |  118 --
 .../com/cloudera/crunch/types/avro/AvrosTest.java  |  221 ---
 .../writable/WritableGroupedTableTypeTest.java     |   37 -
 .../types/writable/WritableTableTypeTest.java      |   30 -
 .../crunch/types/writable/WritableTypeTest.java    |   29 -
 .../crunch/types/writable/WritablesTest.java       |  276 ----
 .../com/cloudera/crunch/util/DistCacheTest.java    |  141 --
 .../java/org/apache/crunch/CollectionsTest.java    |  112 ++
 src/test/java/org/apache/crunch/CombineFnTest.java |  206 +++
 src/test/java/org/apache/crunch/FilterFnTest.java  |   63 +
 src/test/java/org/apache/crunch/MapsTest.java      |   93 ++
 .../java/org/apache/crunch/MaterializeTest.java    |  107 ++
 .../org/apache/crunch/MaterializeToMapTest.java    |   78 +
 .../java/org/apache/crunch/MultipleOutputTest.java |  110 ++
 .../org/apache/crunch/PCollectionGetSizeTest.java  |  154 ++
 .../java/org/apache/crunch/PTableKeyValueTest.java |  107 ++
 src/test/java/org/apache/crunch/PageRankTest.java  |  166 +++
 src/test/java/org/apache/crunch/PairTest.java      |   66 +
 src/test/java/org/apache/crunch/TFIDFTest.java     |  233 +++
 .../java/org/apache/crunch/TermFrequencyTest.java  |  134 ++
 src/test/java/org/apache/crunch/TextPairTest.java  |   69 +
 .../org/apache/crunch/TupleNClassCastBugTest.java  |   95 ++
 src/test/java/org/apache/crunch/TupleTest.java     |  142 ++
 .../java/org/apache/crunch/WordCountHBaseTest.java |  206 +++
 src/test/java/org/apache/crunch/WordCountTest.java |  172 +++
 .../org/apache/crunch/fn/ExtractKeyFnTest.java     |   46 +
 .../java/org/apache/crunch/fn/MapKeysTest.java     |   53 +
 .../java/org/apache/crunch/fn/MapValuesTest.java   |   51 +
 .../java/org/apache/crunch/fn/PairMapTest.java     |   53 +
 .../org/apache/crunch/fn/StoreLastEmitter.java     |   41 +
 .../impl/mem/MemPipelineFileWritingTest.java       |   53 +
 .../org/apache/crunch/impl/mr/MRPipelineTest.java  |   77 +
 .../impl/mr/collect/DoCollectionImplTest.java      |  118 ++
 .../crunch/impl/mr/collect/DoTableImplTest.java    |   88 ++
 .../impl/mr/collect/UnionCollectionTest.java       |  161 +++
 .../crunch/impl/mr/plan/JobNameBuilderTest.java    |   41 +
 .../crunch/io/CompositePathIterableTest.java       |   81 ++
 .../apache/crunch/io/SourceTargetHelperTest.java   |   59 +
 .../crunch/io/avro/AvroFileReaderFactoryTest.java  |  158 +++
 .../crunch/io/avro/AvroFileSourceTargetTest.java   |  153 ++
 .../apache/crunch/io/avro/AvroFileSourceTest.java  |   82 ++
 .../org/apache/crunch/io/avro/AvroReflectTest.java |  115 ++
 .../java/org/apache/crunch/lib/AggregateTest.java  |  232 +++
 .../lib/AvroIndexedRecordPartitionerTest.java      |   99 ++
 .../org/apache/crunch/lib/AvroTypeSortTest.java    |  148 ++
 .../java/org/apache/crunch/lib/CartesianTest.java  |   65 +
 .../java/org/apache/crunch/lib/CogroupTest.java    |  126 ++
 .../java/org/apache/crunch/lib/SampleTest.java     |   37 +
 src/test/java/org/apache/crunch/lib/SetTest.java   |  115 ++
 src/test/java/org/apache/crunch/lib/SortTest.java  |  282 ++++
 .../apache/crunch/lib/SpecificAvroGroupByTest.java |  139 ++
 .../crunch/lib/TupleWritablePartitionerTest.java   |   70 +
 .../apache/crunch/lib/join/FullOuterJoinTest.java  |   51 +
 .../org/apache/crunch/lib/join/InnerJoinTest.java  |   51 +
 .../org/apache/crunch/lib/join/JoinTester.java     |  107 ++
 .../apache/crunch/lib/join/LeftOuterJoinTest.java  |   51 +
 .../apache/crunch/lib/join/MapsideJoinTest.java    |  119 ++
 .../crunch/lib/join/MultiAvroSchemaJoinTest.java   |  116 ++
 .../apache/crunch/lib/join/RightOuterJoinTest.java |   51 +
 .../java/org/apache/crunch/test/CountersTest.java  |   66 +
 src/test/java/org/apache/crunch/test/Employee.java |  230 +++
 src/test/java/org/apache/crunch/test/Person.java   |  230 +++
 .../org/apache/crunch/types/PTypeUtilsTest.java    |   91 ++
 .../crunch/types/avro/AvroDeepCopierTest.java      |   75 +
 .../types/avro/AvroGroupedTableTypeTest.java       |   58 +
 .../crunch/types/avro/AvroTableTypeTest.java       |   52 +
 .../org/apache/crunch/types/avro/AvroTypeTest.java |  135 ++
 .../org/apache/crunch/types/avro/AvrosTest.java    |  224 +++
 .../writable/WritableGroupedTableTypeTest.java     |   54 +
 .../types/writable/WritableTableTypeTest.java      |   47 +
 .../crunch/types/writable/WritableTypeTest.java    |   46 +
 .../crunch/types/writable/WritablesTest.java       |  279 ++++
 .../java/org/apache/crunch/util/DistCacheTest.java |  144 ++
 464 files changed, 25817 insertions(+), 24872 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/5accc9ac/src/main/java/com/cloudera/crunch/CombineFn.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/cloudera/crunch/CombineFn.java b/src/main/java/com/cloudera/crunch/CombineFn.java
deleted file mode 100644
index 9c766cc..0000000
--- a/src/main/java/com/cloudera/crunch/CombineFn.java
+++ /dev/null
@@ -1,805 +0,0 @@
-/**
- * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved.
- *
- * Cloudera, Inc. licenses this file to you under the Apache License,
- * Version 2.0 (the "License"). You may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for
- * the specific language governing permissions and limitations under the
- * License.
- */
-
-package com.cloudera.crunch;
-
-import java.io.Serializable;
-import java.math.BigInteger;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.SortedSet;
-
-import com.cloudera.crunch.util.Tuples;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-/**
- * A special {@link DoFn} implementation that converts an {@link Iterable}
- * of values into a single value. If a {@code CombineFn} instance is used
- * on a {@link PGroupedTable}, the function will be applied to the output
- * of the map stage before the data is passed to the reducer, which can
- * improve the runtime of certain classes of jobs.
- *
- */
-public abstract class CombineFn<S, T> extends DoFn<Pair<S, Iterable<T>>, Pair<S, T>> {
-  
-  public static interface Aggregator<T> extends Serializable {
-    /**
-     * Clears the internal state of this Aggregator and prepares it for the values associated
-     * with the next key.
-     */
-    void reset();
-
-    /**
-     * Incorporate the given value into the aggregate state maintained by this instance.
-     */
-    void update(T value);
-    
-    /**
-     * Returns the current aggregated state of this instance.
-     */
-    Iterable<T> results();    
-  }
-  
-  /**
-   * Interface for constructing new aggregator instances.
-   */
-  public static interface AggregatorFactory<T> {
-    Aggregator<T> create();
-  }
-  
-  /**
-   * A {@code CombineFn} that delegates all of the actual work to an {@code Aggregator}
-   * instance.
-   */
-  public static class AggregatorCombineFn<K, V> extends CombineFn<K, V> {
-    
-    private final Aggregator<V> aggregator;
-    
-    public AggregatorCombineFn(Aggregator<V> aggregator) {
-      this.aggregator = aggregator;
-    }
-    
-    @Override
-    public void process(Pair<K, Iterable<V>> input, Emitter<Pair<K, V>> emitter) {
-      aggregator.reset();
-      for (V v : input.second()) {
-        aggregator.update(v);
-      }
-      for (V v : aggregator.results()) {
-        emitter.emit(Pair.of(input.first(), v));
-      }
-    }    
-  }
-  
-  private static abstract class TupleAggregator<T> implements Aggregator<T> {   
-    private final List<Aggregator<Object>> aggregators;
-    
-    public TupleAggregator(Aggregator<?>...aggregators) {
-      this.aggregators = Lists.newArrayList();
-      for (Aggregator<?> a : aggregators) {
-        this.aggregators.add((Aggregator<Object>) a);
-      }
-    }
-    
-    @Override
-    public void reset() {
-      for (Aggregator<?> a : aggregators) {
-        a.reset();
-      }
-    }
-    
-    protected void updateTuple(Tuple t) {
-      for (int i = 0; i < aggregators.size(); i++) {
-        aggregators.get(i).update(t.get(i));
-      }
-    }
-    
-    protected Iterable<Object> results(int index) {
-      return aggregators.get(index).results();
-    }
-  }
-  
-  public static class PairAggregator<V1, V2> extends TupleAggregator<Pair<V1, V2>> {
-    
-    public PairAggregator(Aggregator<V1> a1, Aggregator<V2> a2) {
-      super(a1, a2);
-    }
-    
-    @Override
-    public void update(Pair<V1, V2> value) {
-      updateTuple(value);
-    }
-    
-    @Override
-    public Iterable<Pair<V1, V2>> results() {
-      return new Tuples.PairIterable<V1, V2>((Iterable<V1>) results(0), (Iterable<V2>) results(1));
-    }
-  }
-  
-  public static class TripAggregator<A, B, C> extends TupleAggregator<Tuple3<A, B, C>> {
-    
-    public TripAggregator(Aggregator<A> a1, Aggregator<B> a2, Aggregator<C> a3) {
-      super(a1, a2, a3);
-    }
-    
-    @Override
-    public void update(Tuple3<A, B, C> value) {
-      updateTuple(value);
-    }
-    
-    @Override
-    public Iterable<Tuple3<A, B, C>> results() {
-      return new Tuples.TripIterable<A, B, C>((Iterable<A>) results(0),
-          (Iterable<B>) results(1), (Iterable<C>) results(2));
-    }
-  }
-
-  public static class QuadAggregator<A, B, C, D> extends TupleAggregator<Tuple4<A, B, C, D>> {
-    
-    public QuadAggregator(Aggregator<A> a1, Aggregator<B> a2, Aggregator<C> a3, Aggregator<D> a4) {
-      super(a1, a2, a3, a4);
-    }
-    
-    @Override
-    public void update(Tuple4<A, B, C, D> value) {
-      updateTuple(value);
-    }
-    
-    @Override
-    public Iterable<Tuple4<A, B, C, D>> results() {
-      return new Tuples.QuadIterable<A, B, C, D>((Iterable<A>) results(0),
-          (Iterable<B>) results(1), (Iterable<C>) results(2), (Iterable<D>) results(3));
-    }
-  }
-  
-  public static class TupleNAggregator extends TupleAggregator<TupleN> {
-    
-    private final int size;
-    
-    public TupleNAggregator(Aggregator<?>... aggregators) {
-      super(aggregators);
-      size = aggregators.length;
-    }
-    
-    @Override
-    public void update(TupleN value) {
-      updateTuple(value);
-    }
-
-    @Override
-    public Iterable<TupleN> results() {
-      Iterable<?>[] iterables = new Iterable[size];
-      for (int i = 0; i < size; i++) {
-        iterables[i] = results(i);
-      }
-      return new Tuples.TupleNIterable(iterables);
-    }
-    
-  }
-  
-  public static final <K, V> CombineFn<K, V> aggregator(Aggregator<V> aggregator) {
-    return new AggregatorCombineFn<K, V>(aggregator);
-  }
-  
-  public static final <K, V> CombineFn<K, V> aggregatorFactory(AggregatorFactory<V> aggregator) {
-    return new AggregatorCombineFn<K, V>(aggregator.create());
-  }
-  
-  public static final <K, V1, V2> CombineFn<K, Pair<V1, V2>> pairAggregator(
-      AggregatorFactory<V1> a1, AggregatorFactory<V2> a2) {
-    return aggregator(new PairAggregator<V1, V2>(a1.create(), a2.create()));
-  }
-  
-  public static final <K, A, B, C> CombineFn<K, Tuple3<A, B, C>> tripAggregator(
-      AggregatorFactory<A> a1, AggregatorFactory<B> a2, AggregatorFactory<C> a3) {
-    return aggregator(new TripAggregator<A, B, C>(a1.create(), a2.create(), a3.create()));
-  }
-
-  public static final <K, A, B, C, D> CombineFn<K, Tuple4<A, B, C, D>> quadAggregator(
-      AggregatorFactory<A> a1, AggregatorFactory<B> a2, AggregatorFactory<C> a3,
-      AggregatorFactory<D> a4) {
-    return aggregator(new QuadAggregator<A, B, C, D>(a1.create(), a2.create(), a3.create(),
-        a4.create()));
-  }
-
-  public static final <K> CombineFn<K, TupleN> tupleAggregator(AggregatorFactory<?>... factories) {
-    Aggregator<?>[] aggs = new Aggregator[factories.length];
-    for (int i = 0; i < aggs.length; i++) {
-      aggs[i] = factories[i].create();
-    }
-    return aggregator(new TupleNAggregator(aggs));
-  }
-  
-  public static final <K> CombineFn<K, Long> SUM_LONGS() {
-    return aggregatorFactory(SUM_LONGS);
-  }
-
-  public static final <K> CombineFn<K, Integer> SUM_INTS() {
-    return aggregatorFactory(SUM_INTS);
-  }
-
-  public static final <K> CombineFn<K, Float> SUM_FLOATS() {
-    return aggregatorFactory(SUM_FLOATS);
-  }
-
-  public static final <K> CombineFn<K, Double> SUM_DOUBLES() {
-    return aggregatorFactory(SUM_DOUBLES);
-  }
- 
-  public static final <K> CombineFn<K, BigInteger> SUM_BIGINTS() {
-    return aggregatorFactory(SUM_BIGINTS);
-  }
-  
-  public static final <K> CombineFn<K, Long> MAX_LONGS() {
-    return aggregatorFactory(MAX_LONGS);
-  }
-
-  public static final <K> CombineFn<K, Long> MAX_LONGS(int n) {
-    return aggregator(new MaxNAggregator<Long>(n));
-  }
-  
-  public static final <K> CombineFn<K, Integer> MAX_INTS() {
-    return aggregatorFactory(MAX_INTS);
-  }
-
-  public static final <K> CombineFn<K, Integer> MAX_INTS(int n) {
-    return aggregator(new MaxNAggregator<Integer>(n));
-  }
-
-  public static final <K> CombineFn<K, Float> MAX_FLOATS() {
-    return aggregatorFactory(MAX_FLOATS);
-  }
-
-  public static final <K> CombineFn<K, Float> MAX_FLOATS(int n) {
-    return aggregator(new MaxNAggregator<Float>(n));
-  }
-
-  public static final <K> CombineFn<K, Double> MAX_DOUBLES() {
-    return aggregatorFactory(MAX_DOUBLES);
-  }
-
-  public static final <K> CombineFn<K, Double> MAX_DOUBLES(int n) {
-    return aggregator(new MaxNAggregator<Double>(n));
-  }
-  
-  public static final <K> CombineFn<K, BigInteger> MAX_BIGINTS() {
-    return aggregatorFactory(MAX_BIGINTS);
-  }
-  
-  public static final <K> CombineFn<K, BigInteger> MAX_BIGINTS(int n) {
-    return aggregator(new MaxNAggregator<BigInteger>(n));
-  }
-  
-  public static final <K> CombineFn<K, Long> MIN_LONGS() {
-    return aggregatorFactory(MIN_LONGS);
-  }
-
-  public static final <K> CombineFn<K, Long> MIN_LONGS(int n) {
-    return aggregator(new MinNAggregator<Long>(n));
-  }
-
-  public static final <K> CombineFn<K, Integer> MIN_INTS() {
-    return aggregatorFactory(MIN_INTS);
-  }
-
-  public static final <K> CombineFn<K, Integer> MIN_INTS(int n) {
-    return aggregator(new MinNAggregator<Integer>(n));
-  }
-  
-  public static final <K> CombineFn<K, Float> MIN_FLOATS() {
-    return aggregatorFactory(MIN_FLOATS);
-  }
-
-  public static final <K> CombineFn<K, Float> MIN_FLOATS(int n) {
-    return aggregator(new MinNAggregator<Float>(n));
-  }
-  
-  public static final <K> CombineFn<K, Double> MIN_DOUBLES() {
-    return aggregatorFactory(MIN_DOUBLES);
-  }
-
-  public static final <K> CombineFn<K, Double> MIN_DOUBLES(int n) {
-    return aggregator(new MinNAggregator<Double>(n));
-  }
-  
-  public static final <K> CombineFn<K, BigInteger> MIN_BIGINTS() {
-    return aggregatorFactory(MIN_BIGINTS);
-  }
-  
-  public static final <K> CombineFn<K, BigInteger> MIN_BIGINTS(int n) {
-    return aggregator(new MinNAggregator<BigInteger>(n));
-  }
-  
-  public static final <K, V> CombineFn<K, V> FIRST_N(int n) {
-    return aggregator(new FirstNAggregator<V>(n));
-  }
-
-  public static final <K, V> CombineFn<K, V> LAST_N(int n) {
-    return aggregator(new LastNAggregator<V>(n));
-  }
-  
-  public static class SumLongs implements Aggregator<Long> {    
-    private long sum = 0;
-    
-    @Override
-    public void reset() {
-      sum = 0;
-    }
-
-    @Override
-    public void update(Long next) {
-      sum += next;
-    }
-    
-    @Override
-    public Iterable<Long> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-  public static AggregatorFactory<Long> SUM_LONGS = new AggregatorFactory<Long>() {
-    public Aggregator<Long> create() { return new SumLongs(); }
-  };
-  
-  public static class SumInts implements Aggregator<Integer> {
-    private int sum = 0;
-    
-    @Override
-    public void reset() {
-      sum = 0;
-    }
-
-    @Override
-    public void update(Integer next) {
-      sum += next;
-    }
-    
-    @Override
-    public Iterable<Integer> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-  public static AggregatorFactory<Integer> SUM_INTS = new AggregatorFactory<Integer>() {
-    public Aggregator<Integer> create() { return new SumInts(); }
-  };
-  
-  public static class SumFloats implements Aggregator<Float> {    
-    private float sum = 0;
-    
-    @Override
-    public void reset() {
-      sum = 0f;
-    }
-
-    @Override
-    public void update(Float next) {
-      sum += next;
-    }
-    
-    @Override
-    public Iterable<Float> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-  public static AggregatorFactory<Float> SUM_FLOATS = new AggregatorFactory<Float>() {
-    public Aggregator<Float> create() { return new SumFloats(); }
-  };
-  
-  public static class SumDoubles implements Aggregator<Double> {    
-    private double sum = 0;
-    
-    @Override
-    public void reset() {
-      sum = 0f;
-    }
-
-    @Override
-    public void update(Double next) {
-      sum += next;
-    }
-    
-    @Override
-    public Iterable<Double> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-  public static AggregatorFactory<Double> SUM_DOUBLES = new AggregatorFactory<Double>() {
-    public Aggregator<Double> create() { return new SumDoubles(); }
-  };
-  
-  public static class SumBigInts implements Aggregator<BigInteger> {    
-    private BigInteger sum = BigInteger.ZERO;
-    
-    @Override
-    public void reset() {
-      sum = BigInteger.ZERO;
-    }
-
-    @Override
-    public void update(BigInteger next) {
-      sum = sum.add(next);
-    }
-    
-    @Override
-    public Iterable<BigInteger> results() {
-      return ImmutableList.of(sum);
-    }
-  }
-  public static AggregatorFactory<BigInteger> SUM_BIGINTS = new AggregatorFactory<BigInteger>() {
-    public Aggregator<BigInteger> create() { return new SumBigInts(); }
-  };
-  
-  public static class MaxLongs implements Aggregator<Long> {
-    private Long max = null;
-    
-    @Override
-    public void reset() {
-      max = null;
-    }
-    
-    @Override
-    public void update(Long next) {
-      if (max == null || max < next) {
-        max = next;
-      }
-    }
-    
-    @Override
-    public Iterable<Long> results() {
-      return ImmutableList.of(max);
-    }
-  }
-  public static AggregatorFactory<Long> MAX_LONGS = new AggregatorFactory<Long>() {
-    public Aggregator<Long> create() { return new MaxLongs(); }
-  };
-  
-  public static class MaxInts implements Aggregator<Integer> {
-    private Integer max = null;
-    
-    @Override
-    public void reset() {
-      max = null;
-    }
-    
-    @Override
-    public void update(Integer next) {
-      if (max == null || max < next) {
-        max = next;
-      }
-    }
-    
-    @Override
-    public Iterable<Integer> results() {
-      return ImmutableList.of(max);
-    }
-  }
-  public static AggregatorFactory<Integer> MAX_INTS = new AggregatorFactory<Integer>() {
-    public Aggregator<Integer> create() { return new MaxInts(); }
-  };
-  
-  public static class MaxFloats implements Aggregator<Float> {
-    private Float max = null;
-    
-    @Override
-    public void reset() {
-      max = null;
-    }
-    
-    @Override
-    public void update(Float next) {
-      if (max == null || max < next) {
-        max = next;
-      }
-    }
-    
-    @Override
-    public Iterable<Float> results() {
-      return ImmutableList.of(max);
-    }
-  }
-  public static AggregatorFactory<Float> MAX_FLOATS = new AggregatorFactory<Float>() {
-    public Aggregator<Float> create() { return new MaxFloats(); }
-  };
-  
-  public static class MaxDoubles implements Aggregator<Double> {
-    private Double max = null;
-    
-    @Override
-    public void reset() {
-      max = null;
-    }
-    
-    @Override
-    public void update(Double next) {
-      if (max == null || max < next) {
-        max = next;
-      }
-    }
-    
-    @Override
-    public Iterable<Double> results() {
-      return ImmutableList.of(max);
-    }
-  }
-  public static AggregatorFactory<Double> MAX_DOUBLES = new AggregatorFactory<Double>() {
-    public Aggregator<Double> create() { return new MaxDoubles(); }
-  };
-  
-  public static class MaxBigInts implements Aggregator<BigInteger> {
-    private BigInteger max = null;
-    
-    @Override
-    public void reset() {
-      max = null;
-    }
-    
-    @Override
-    public void update(BigInteger next) {
-      if (max == null || max.compareTo(next) < 0) {
-        max = next;
-      }
-    }
-    
-    @Override
-    public Iterable<BigInteger> results() {
-      return ImmutableList.of(max);
-    }
-  }
-  public static AggregatorFactory<BigInteger> MAX_BIGINTS = new AggregatorFactory<BigInteger>() {
-    public Aggregator<BigInteger> create() { return new MaxBigInts(); }
-  };
-  
-  public static class MinLongs implements Aggregator<Long> {
-    private Long min = null;
-    
-    @Override
-    public void reset() {
-      min = null;
-    }
-    
-    @Override
-    public void update(Long next) {
-      if (min == null || min > next) {
-        min = next;
-      }
-    }
-    
-    @Override
-    public Iterable<Long> results() {
-      return ImmutableList.of(min);
-    }
-  }
-  public static AggregatorFactory<Long> MIN_LONGS = new AggregatorFactory<Long>() {
-    public Aggregator<Long> create() { return new MinLongs(); }
-  };
-  
-  public static class MinInts implements Aggregator<Integer> {    
-    private Integer min = null;
-    
-    @Override
-    public void reset() {
-      min = null;
-    }
-    
-    @Override
-    public void update(Integer next) {
-      if (min == null || min > next) {
-        min = next;
-      }
-    }
-    
-    @Override
-    public Iterable<Integer> results() {
-      return ImmutableList.of(min);
-    }
-  }
-  public static AggregatorFactory<Integer> MIN_INTS = new AggregatorFactory<Integer>() {
-    public Aggregator<Integer> create() { return new MinInts(); }
-  };
-  
-  public static class MinFloats implements Aggregator<Float> {
-    private Float min = null;
-    
-    @Override
-    public void reset() {
-      min = null;
-    }
-    
-    @Override
-    public void update(Float next) {
-      if (min == null || min > next) {
-        min = next;
-      }
-    }
-    
-    @Override
-    public Iterable<Float> results() {
-      return ImmutableList.of(min);
-    }
-  }
-  public static AggregatorFactory<Float> MIN_FLOATS = new AggregatorFactory<Float>() {
-    public Aggregator<Float> create() { return new MinFloats(); }
-  };
-  
-  public static class MinDoubles implements Aggregator<Double> {
-    private Double min = null;
-    
-    @Override
-    public void reset() {
-      min = null;
-    }
-    
-    @Override
-    public void update(Double next) {
-      if (min == null || min > next) {
-        min = next;
-      }
-    }
-    
-    @Override
-    public Iterable<Double> results() {
-      return ImmutableList.of(min);
-    }
-  }
-  public static AggregatorFactory<Double> MIN_DOUBLES = new AggregatorFactory<Double>() {
-    public Aggregator<Double> create() { return new MinDoubles(); }
-  };
-
-  public static class MinBigInts implements Aggregator<BigInteger> {
-    private BigInteger min = null;
-    
-    @Override
-    public void reset() {
-      min = null;
-    }
-    
-    @Override
-    public void update(BigInteger next) {
-      if (min == null || min.compareTo(next) > 0) {
-        min = next;
-      }
-    }
-    
-    @Override
-    public Iterable<BigInteger> results() {
-      return ImmutableList.of(min);
-    }
-  }
-  public static AggregatorFactory<BigInteger> MIN_BIGINTS = new AggregatorFactory<BigInteger>() {
-    public Aggregator<BigInteger> create() { return new MinBigInts(); }
-  };
-  
-  public static class MaxNAggregator<V extends Comparable<V>> implements Aggregator<V> {
-    private final int arity;
-    private transient SortedSet<V> elements;
-
-    public MaxNAggregator(int arity) {
-      this.arity = arity;
-    }
-
-    @Override
-    public void reset() {
-      if (elements == null) {
-        elements = Sets.newTreeSet();
-      } else {
-        elements.clear();
-      }
-    }
-    
-    @Override
-    public void update(V value) {
-      if (elements.size() < arity) {
-        elements.add(value);
-      } else if (value.compareTo(elements.first()) > 0) {
-        elements.remove(elements.first());
-        elements.add(value);
-      }
-    }
-    
-    @Override
-    public Iterable<V> results() {
-      return ImmutableList.copyOf(elements);
-    }
-  }
-  
-  public static class MinNAggregator<V extends Comparable<V>> implements Aggregator<V> {
-    private final int arity;
-    private transient SortedSet<V> elements;
-    
-    public MinNAggregator(int arity) {
-      this.arity = arity;
-    }
-
-    @Override
-    public void reset() {
-      if (elements == null) {
-        elements = Sets.newTreeSet();
-      } else {
-        elements.clear();
-      }
-    }
-    
-    @Override
-    public void update(V value) {
-      if (elements.size() < arity) {
-        elements.add(value);
-      } else if (value.compareTo(elements.last()) < 0) {
-        elements.remove(elements.last());
-        elements.add(value);
-      }
-    }
-    
-    @Override
-    public Iterable<V> results() {
-      return ImmutableList.copyOf(elements);
-    }
-  }
-  
-  public static class FirstNAggregator<V> implements Aggregator<V> {
-    private final int arity;
-    private final List<V> elements;
-    
-    public FirstNAggregator(int arity) {
-      this.arity = arity;
-      this.elements = Lists.newArrayList();
-    }
-
-    @Override
-    public void reset() {
-      elements.clear();
-    }
-    
-    @Override
-    public void update(V value) {
-      if (elements.size() < arity) {
-        elements.add(value);
-      }
-    }
-    
-    @Override
-    public Iterable<V> results() {
-      return ImmutableList.copyOf(elements);
-    }
-  }
-
-  public static class LastNAggregator<V> implements Aggregator<V> {
-    private final int arity;
-    private final LinkedList<V> elements;
-    
-    public LastNAggregator(int arity) {
-      this.arity = arity;
-      this.elements = Lists.newLinkedList();
-    }
-
-    @Override
-    public void reset() {
-      elements.clear();
-    }
-    
-    @Override
-    public void update(V value) {
-      elements.add(value);
-      if (elements.size() == arity + 1) {
-        elements.removeFirst();
-      }
-    }
-    
-    @Override
-    public Iterable<V> results() {
-      return ImmutableList.copyOf(elements);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/5accc9ac/src/main/java/com/cloudera/crunch/DoFn.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/cloudera/crunch/DoFn.java b/src/main/java/com/cloudera/crunch/DoFn.java
deleted file mode 100644
index b1bbb73..0000000
--- a/src/main/java/com/cloudera/crunch/DoFn.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/**
- * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved.
- *
- * Cloudera, Inc. licenses this file to you under the Apache License,
- * Version 2.0 (the "License"). You may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for
- * the specific language governing permissions and limitations under the
- * License.
- */
-
-package com.cloudera.crunch;
-
-import java.io.Serializable;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-
-import com.cloudera.crunch.test.TestCounters;
-
-/**
- * Base class for all data processing functions in Crunch.
- * 
- * <p>Note that all {@code DoFn} instances implement {@link Serializable},
- * and thus all of their non-transient member variables must implement
- * {@code Serializable} as well. If your DoFn depends on non-serializable
- * classes for data processing, they may be declared as {@code transient}
- * and initialized in the DoFn's {@code initialize} method.
- *
- */
-public abstract class DoFn<S, T> implements Serializable {
-  private transient TaskInputOutputContext<?, ?, ?, ?> context;
-  private transient Configuration testConf;
-  private transient String internalStatus;
-  
-  /**
-   * Called during the job planning phase. Subclasses may override
-   * this method in order to modify the configuration of the Job
-   * that this DoFn instance belongs to.
-   * 
-   * @param conf The Configuration instance for the Job.
-   */
-  public void configure(Configuration conf) {  
-  }
-  
-  /**
-   * Processes the records from a {@link PCollection}.
-   * 
-   * <br/>
-   * <br/>
-   * <b>Note:</b> Crunch can reuse a single input record object whose content
-   * changes on each {@link #process(Object, Emitter)} method call. This
-   * functionality is imposed by Hadoop's <a href="http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/Reducer.html">Reducer</a> implementation: 
-   * <i>The framework will reuse the key and value objects that are passed into the reduce, therefore the application
-   * should clone the objects they want to keep a copy of.</i>
-   * 
-   * @param input
-   *          The input record.
-   * @param emitter
-   *          The emitter to send the output to
-   */
-  public abstract void process(S input, Emitter<T> emitter);
-
-  /**
-   * Called during the setup of the MapReduce job this {@code DoFn}
-   * is associated with. Subclasses may override this method to
-   * do appropriate initialization.
-   */
-  public void initialize() {
-  }
-
-  /**
-   * Called during the cleanup of the MapReduce job this {@code DoFn}
-   * is associated with. Subclasses may override this method to do
-   * appropriate cleanup.
-   * 
-   * @param emitter The emitter that was used for output
-   */
-  public void cleanup(Emitter<T> emitter) {
-  }
-
-  /**
-   * Called during setup to pass the {@link TaskInputOutputContext} to
-   * this {@code DoFn} instance.
-   */
-  public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
-    this.context = context;
-    initialize();
-  }
-
-  /**
-   * Sets a {@code Configuration} instance to be used during unit tests.
-   * @param conf The Configuration instance.
-   */
-  public void setConfigurationForTest(Configuration conf) {
-    this.testConf = conf;
-  }
-  
-  /**
-   * Returns an estimate of how applying this function to a {@link PCollection}
-   * will cause it to change in side. The optimizer uses these estimates to
-   * decide where to break up dependent MR jobs into separate Map and Reduce
-   * phases in order to minimize I/O.
-   * 
-   * <p>
-   * Subclasses of {@code DoFn} that will substantially alter the size of the
-   * resulting {@code PCollection} should override this method.
-   */
-  public float scaleFactor() {
-    return 1.2f;
-  }
-  
-  protected TaskInputOutputContext<?, ?, ?, ?> getContext() {
-    return context;
-  }
-  
-  protected Configuration getConfiguration() {
-    if (context != null) {
-      return context.getConfiguration();
-    } else if (testConf != null) {
-      return testConf;
-    }
-    return null;
-  }
-  
-  protected Counter getCounter(Enum<?> counterName) {
-    if (context == null) {
-      return TestCounters.getCounter(counterName);
-    }
-    return context.getCounter(counterName);
-  }
-  
-  protected Counter getCounter(String groupName, String counterName) {
-    if (context == null) {
-      return TestCounters.getCounter(groupName, counterName);
-    }
-    return context.getCounter(groupName, counterName);
-  }
-  
-  protected void increment(Enum<?> counterName) {
-    increment(counterName, 1);
-  }
-  
-  protected void increment(Enum<?> counterName, long value) {
-    getCounter(counterName).increment(value);
-  }
-  
-  protected void progress() {
-    if (context != null) {
-      context.progress();
-    }
-  }
-  
-  protected TaskAttemptID getTaskAttemptID() {
-    if (context != null) {
-      return context.getTaskAttemptID();
-    } else {
-      return new TaskAttemptID();
-    }
-  }
-  
-  protected void setStatus(String status) {
-    if (context != null) {
-      context.setStatus(status);
-    }
-    this.internalStatus = status;
-  }
-  
-  protected String getStatus() {
-    if (context != null) {
-      return context.getStatus();
-    }
-    return internalStatus;
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/5accc9ac/src/main/java/com/cloudera/crunch/Emitter.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/cloudera/crunch/Emitter.java b/src/main/java/com/cloudera/crunch/Emitter.java
deleted file mode 100644
index a968e74..0000000
--- a/src/main/java/com/cloudera/crunch/Emitter.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved.
- *
- * Cloudera, Inc. licenses this file to you under the Apache License,
- * Version 2.0 (the "License"). You may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for
- * the specific language governing permissions and limitations under the
- * License.
- */
-
-package com.cloudera.crunch;
-
-/**
- * Interface for writing outputs from a {@link DoFn}.
- *
- */
-public interface Emitter<T> {
-  /**
-   * Write the emitted value to the next stage of the pipeline.
-   * 
-   * @param emitted The value to write
-   */
-  void emit(T emitted);
-
-  /**
-   * Flushes any values cached by this emitter. Called during the
-   * cleanup stage.
-   */
-  void flush();
-}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/5accc9ac/src/main/java/com/cloudera/crunch/FilterFn.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/cloudera/crunch/FilterFn.java b/src/main/java/com/cloudera/crunch/FilterFn.java
deleted file mode 100644
index 5fa7d75..0000000
--- a/src/main/java/com/cloudera/crunch/FilterFn.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/**
- * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved.
- *
- * Cloudera, Inc. licenses this file to you under the Apache License,
- * Version 2.0 (the "License"). You may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for
- * the specific language governing permissions and limitations under the
- * License.
- */
-
-package com.cloudera.crunch;
-
-import java.util.List;
-
-import com.google.common.collect.ImmutableList;
-
-/**
- * A {@link DoFn} for the common case of filtering the members of
- * a {@link PCollection} based on a boolean condition.
- *
- */
-public abstract class FilterFn<T> extends DoFn<T, T> {
-
-  /**
-   * If true, emit the given record.
-   */
-  public abstract boolean accept(T input);
-
-  @Override
-  public void process(T input, Emitter<T> emitter) {
-    if (accept(input)) {
-      emitter.emit(input);
-    }
-  }
-
-  @Override
-  public float scaleFactor() {
-    return 0.5f;
-  }
-  
-  public static <S> FilterFn<S> and(FilterFn<S>...fns) {
-    return new AndFn<S>(fns);
-  }
-  
-  public static class AndFn<S> extends FilterFn<S> {
-    
-    private final List<FilterFn<S>> fns;
-    
-    public AndFn(FilterFn<S>... fns) {
-      this.fns = ImmutableList.<FilterFn<S>>copyOf(fns);
-    }
-    
-    @Override
-    public boolean accept(S input) {
-      for (FilterFn<S> fn : fns) {
-        if (!fn.accept(input)) {
-          return false;
-        }
-      }
-      return true;
-    }
-    
-    @Override
-    public float scaleFactor() {
-      float scaleFactor = 1.0f;
-      for (FilterFn<S> fn : fns) {
-        scaleFactor *= fn.scaleFactor();
-      }
-      return scaleFactor;
-    }    
-  }
-  
-  public static <S> FilterFn<S> or(FilterFn<S>...fns) {
-    return new OrFn<S>(fns);
-  }
-  
-  public static class OrFn<S> extends FilterFn<S> {
-    
-    private final List<FilterFn<S>> fns;
-    
-    public OrFn(FilterFn<S>... fns) {
-      this.fns = ImmutableList.<FilterFn<S>>copyOf(fns);
-    }
-    
-    @Override
-    public boolean accept(S input) {
-      for (FilterFn<S> fn : fns) {
-        if (fn.accept(input)) {
-          return true;
-        }
-      }
-      return false;
-    }
-    
-    @Override
-    public float scaleFactor() {
-      float scaleFactor = 0.0f;
-      for (FilterFn<S> fn : fns) {
-        scaleFactor += fn.scaleFactor();
-      }
-      return Math.min(1.0f, scaleFactor);
-    }    
-  }
-  
-  public static <S> FilterFn<S> not(FilterFn<S> fn) {
-    return new NotFn<S>(fn);
-  }
-  
-  public static class NotFn<S> extends FilterFn<S> {
-    
-    private final FilterFn<S> base;
-    
-    public NotFn(FilterFn<S> base) {
-      this.base = base;
-    }
-    
-    @Override
-    public boolean accept(S input) {
-      return !base.accept(input);
-    }
-    
-    @Override
-    public float scaleFactor() {
-      return 1.0f - base.scaleFactor();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/5accc9ac/src/main/java/com/cloudera/crunch/GroupingOptions.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/cloudera/crunch/GroupingOptions.java b/src/main/java/com/cloudera/crunch/GroupingOptions.java
deleted file mode 100644
index 43e19ca..0000000
--- a/src/main/java/com/cloudera/crunch/GroupingOptions.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/**
- * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved.
- *
- * Cloudera, Inc. licenses this file to you under the Apache License,
- * Version 2.0 (the "License"). You may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for
- * the specific language governing permissions and limitations under the
- * License.
- */
-
-package com.cloudera.crunch;
-
-import org.apache.hadoop.io.RawComparator;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Partitioner;
-
-/**
- * Options that can be passed to a {@code groupByKey} operation in order to exercise
- * finer control over how the partitioning, grouping, and sorting of keys is
- * performed.
- *
- */
-public class GroupingOptions {
-
-  private final Class<? extends Partitioner> partitionerClass;
-  private final Class<? extends RawComparator> groupingComparatorClass;
-  private final Class<? extends RawComparator> sortComparatorClass;
-  private final int numReducers;
-
-  private GroupingOptions(Class<? extends Partitioner> partitionerClass,
-      Class<? extends RawComparator> groupingComparatorClass,
-      Class<? extends RawComparator> sortComparatorClass, int numReducers) {
-    this.partitionerClass = partitionerClass;
-    this.groupingComparatorClass = groupingComparatorClass;
-    this.sortComparatorClass = sortComparatorClass;
-    this.numReducers = numReducers;
-  }
-
-  public int getNumReducers() {
-    return numReducers;
-  }
-  
-  public Class<? extends RawComparator> getSortComparatorClass() {
-    return sortComparatorClass;
-  }
-  
-  public void configure(Job job) {
-    if (partitionerClass != null) {
-      job.setPartitionerClass(partitionerClass);
-    }
-    if (groupingComparatorClass != null) {
-      job.setGroupingComparatorClass(groupingComparatorClass);
-    }
-    if (sortComparatorClass != null) {
-      job.setSortComparatorClass(sortComparatorClass);
-    }
-    if (numReducers > 0) {
-      job.setNumReduceTasks(numReducers);
-    }
-  }
-
-  public boolean isCompatibleWith(GroupingOptions other) {
-    if (partitionerClass != other.partitionerClass) {
-      return false;
-    }
-    if (groupingComparatorClass != other.groupingComparatorClass) {
-      return false;
-    }
-    if (sortComparatorClass != other.sortComparatorClass) {
-      return false;
-    }
-    return true;
-  }
-
-  public static Builder builder() {
-    return new Builder();
-  }
-
-  /**
-   * Builder class for creating {@code GroupingOptions} instances.
-   *
-   */
-  public static class Builder {
-    private Class<? extends Partitioner> partitionerClass;
-    private Class<? extends RawComparator> groupingComparatorClass;
-    private Class<? extends RawComparator> sortComparatorClass;
-    private int numReducers;
-
-    public Builder() {
-    }
-
-    public Builder partitionerClass(
-        Class<? extends Partitioner> partitionerClass) {
-      this.partitionerClass = partitionerClass;
-      return this;
-    }
-
-    public Builder groupingComparatorClass(
-        Class<? extends RawComparator> groupingComparatorClass) {
-      this.groupingComparatorClass = groupingComparatorClass;
-      return this;
-    }
-
-    public Builder sortComparatorClass(
-        Class<? extends RawComparator> sortComparatorClass) {
-      this.sortComparatorClass = sortComparatorClass;
-      return this;
-    }
-
-    public Builder numReducers(int numReducers) {
-      if (numReducers <= 0) {
-        throw new IllegalArgumentException("Invalid number of reducers: " + numReducers);
-      }
-      this.numReducers = numReducers;
-      return this;
-    }
-
-    public GroupingOptions build() {
-      return new GroupingOptions(partitionerClass, groupingComparatorClass,
-          sortComparatorClass, numReducers);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/5accc9ac/src/main/java/com/cloudera/crunch/MapFn.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/cloudera/crunch/MapFn.java b/src/main/java/com/cloudera/crunch/MapFn.java
deleted file mode 100644
index 8c11fa6..0000000
--- a/src/main/java/com/cloudera/crunch/MapFn.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved.
- *
- * Cloudera, Inc. licenses this file to you under the Apache License,
- * Version 2.0 (the "License"). You may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for
- * the specific language governing permissions and limitations under the
- * License.
- */
-
-package com.cloudera.crunch;
-
-/**
- * A {@link DoFn} for the common case of emitting exactly one value
- * for each input record.
- *
- */
-public abstract class MapFn<S, T> extends DoFn<S, T> {
-  
-  /**
-   * Maps the given input into an instance of the output type.
-   */
-  public abstract T map(S input);
-
-  @Override
-  public void process(S input, Emitter<T> emitter) {
-    emitter.emit(map(input));
-  }
-
-  @Override
-  public float scaleFactor() {
-    return 1.0f;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/5accc9ac/src/main/java/com/cloudera/crunch/PCollection.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/cloudera/crunch/PCollection.java b/src/main/java/com/cloudera/crunch/PCollection.java
deleted file mode 100644
index 6b84297..0000000
--- a/src/main/java/com/cloudera/crunch/PCollection.java
+++ /dev/null
@@ -1,181 +0,0 @@
-/**
- * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved.
- *
- * Cloudera, Inc. licenses this file to you under the Apache License,
- * Version 2.0 (the "License"). You may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for
- * the specific language governing permissions and limitations under the
- * License.
- */
-
-package com.cloudera.crunch;
-
-import com.cloudera.crunch.types.PTableType;
-import com.cloudera.crunch.types.PType;
-import com.cloudera.crunch.types.PTypeFamily;
-
-/**
- * A representation of an immutable, distributed collection of elements
- * that is the fundamental target of computations in Crunch.
- *
- */
-public interface PCollection<S> {
-  /**
-   * Returns the {@code Pipeline} associated with this PCollection.
-   */
-  Pipeline getPipeline();
-  
-  /**
-   * Returns a {@code PCollection} instance that acts as the union
-   * of this {@code PCollection} and the input {@code PCollection}s.
-   */
-  PCollection<S> union(PCollection<S>... collections);
-
-  /**
-   * Applies the given doFn to the elements of this {@code PCollection} and
-   * returns a new {@code PCollection} that is the output of this processing.
-   * 
-   * @param doFn The {@code DoFn} to apply
-   * @param type The {@link PType} of the resulting {@code PCollection}
-   * @return a new {@code PCollection}
-   */
-  <T> PCollection<T> parallelDo(DoFn<S, T> doFn, PType<T> type);
-  
-  /**
-   * Applies the given doFn to the elements of this {@code PCollection} and
-   * returns a new {@code PCollection} that is the output of this processing.
-   * 
-   * @param name An identifier for this processing step, useful for debugging
-   * @param doFn The {@code DoFn} to apply
-   * @param type The {@link PType} of the resulting {@code PCollection}
-   * @return a new {@code PCollection}
-   */
-  <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type);
-
-  /**
-   * Similar to the other {@code parallelDo} instance, but returns a
-   * {@code PTable} instance instead of a {@code PCollection}.
-   * 
-   * @param doFn The {@code DoFn} to apply
-   * @param type The {@link PTableType} of the resulting {@code PTable}
-   * @return a new {@code PTable}
-   */
-  <K, V> PTable<K, V> parallelDo(DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type);
-  
-  /**
-   * Similar to the other {@code parallelDo} instance, but returns a
-   * {@code PTable} instance instead of a {@code PCollection}.
-   * 
-   * @param name An identifier for this processing step
-   * @param doFn The {@code DoFn} to apply
-   * @param type The {@link PTableType} of the resulting {@code PTable}
-   * @return a new {@code PTable}
-   */
-  <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn,
-      PTableType<K, V> type);
-
-  /**
-   * Write the contents of this {@code PCollection} to the given {@code Target},
-   * using the storage format specified by the target.
-   * 
-   * @param target The target to write to
-   */
-  PCollection<S> write(Target target);
-  
-  /**
-   * Returns a reference to the data set represented by this PCollection that
-   * may be used by the client to read the data locally.
-   */
-  Iterable<S> materialize();
-  
-  /**
-   * Returns the {@code PType} of this {@code PCollection}.
-   */
-  PType<S> getPType();
-
-  /**
-   * Returns the {@code PTypeFamily} of this {@code PCollection}.
-   */
-  PTypeFamily getTypeFamily();
-
-  /**
-   * Returns the size of the data represented by this {@code PCollection} in bytes.
-   */
-  long getSize();
-
-  /**
-   * Returns a shorthand name for this PCollection.
-   */
-  String getName();
-  
-  /**
-   * Apply the given filter function to this instance and return the
-   * resulting {@code PCollection}.
-   */
-  PCollection<S> filter(FilterFn<S> filterFn);
-  
-  /**
-   * Apply the given filter function to this instance and return the
-   * resulting {@code PCollection}.
-   * 
-   * @param name An identifier for this processing step
-   * @param filterFn The {@code FilterFn} to apply
-   */
-  PCollection<S> filter(String name, FilterFn<S> filterFn);
-  
-  /**
-   * Apply the given map function to each element of this instance in order
-   * to create a {@code PTable}.
-   */
-  <K> PTable<K, S> by(MapFn<S, K> extractKeyFn, PType<K> keyType);
- 
-  /**
-   * Apply the given map function to each element of this instance in order
-   * to create a {@code PTable}.
-   *   
-   * @param name An identifier for this processing step
-   * @param extractKeyFn The {@code MapFn} to apply
-   */
-  <K> PTable<K, S> by(String name, MapFn<S, K> extractKeyFn, PType<K> keyType);
-  
-  /**
-   * Returns a {@code PCollection} instance that contains all of the elements
-   * of this instance in sorted order.
-   */
-  PCollection<S> sort(boolean ascending);
-  
-  /**
-   * Returns a {@code PTable} instance that contains the counts of each unique
-   * element of this PCollection.
-   */
-  PTable<S, Long> count();
-  
-  /**
-   * Returns a {@code PCollection} made up of only the maximum element of this
-   * instance.
-   */
-  PCollection<S> max();
-  
-  /**
-   * Returns a {@code PCollection} made up of only the minimum element of this
-   * instance.
-   */
-  PCollection<S> min();
-  
-  /**
-   * Randomly sample items from this PCollection instance with the given
-   * probability of an item being accepted.
-   */
-  PCollection<S> sample(double acceptanceProbability);
-  
-  /**
-   * Randomly sample items from this PCollection instance with the given
-   * probability of an item being accepted and using the given seed.
-   */
-  PCollection<S> sample(double acceptanceProbability, long seed);
-}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/5accc9ac/src/main/java/com/cloudera/crunch/PGroupedTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/cloudera/crunch/PGroupedTable.java b/src/main/java/com/cloudera/crunch/PGroupedTable.java
deleted file mode 100644
index a980853..0000000
--- a/src/main/java/com/cloudera/crunch/PGroupedTable.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved.
- *
- * Cloudera, Inc. licenses this file to you under the Apache License,
- * Version 2.0 (the "License"). You may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for
- * the specific language governing permissions and limitations under the
- * License.
- */
-
-package com.cloudera.crunch;
-
-/**
- * The Crunch representation of a grouped {@link PTable}.
- *
- */
-public interface PGroupedTable<K, V> extends PCollection<Pair<K, Iterable<V>>> {
-  /**
-   * Combines the values of this grouping using the given {@code CombineFn}.
-   * 
-   * @param combineFn The combiner function
-   * @return A {@code PTable} where each key has a single value
-   */
-  PTable<K, V> combineValues(CombineFn<K, V> combineFn);
-
-  /**
-   * Convert this grouping back into a multimap.
-   * 
-   * @return an ungrouped version of the data in this {@code PGroupedTable}.
-   */
-  PTable<K, V> ungroup();
-}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/5accc9ac/src/main/java/com/cloudera/crunch/PTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/cloudera/crunch/PTable.java b/src/main/java/com/cloudera/crunch/PTable.java
deleted file mode 100644
index b622a3f..0000000
--- a/src/main/java/com/cloudera/crunch/PTable.java
+++ /dev/null
@@ -1,131 +0,0 @@
-/**
- * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved.
- *
- * Cloudera, Inc. licenses this file to you under the Apache License,
- * Version 2.0 (the "License"). You may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for
- * the specific language governing permissions and limitations under the
- * License.
- */
-
-package com.cloudera.crunch;
-
-import java.util.Collection;
-import java.util.Map;
-
-import com.cloudera.crunch.types.PTableType;
-import com.cloudera.crunch.types.PType;
-
-/**
- * A sub-interface of {@code PCollection} that represents an immutable,
- * distributed multi-map of keys and values.
- *
- */
-public interface PTable<K, V> extends PCollection<Pair<K, V>> {
-  /**
-   * Returns a {@code PTable} instance that acts as the union
-   * of this {@code PTable} and the input {@code PTable}s.
-   */
-  PTable<K, V> union(PTable<K, V>... others);
-
-  /**
-   * Performs a grouping operation on the keys of this table.
-   * @return a {@code PGroupedTable} instance that represents the grouping
-   */
-  PGroupedTable<K, V> groupByKey();
-
-  /**
-   * Performs a grouping operation on the keys of this table, using the given
-   * number of partitions.
-   * 
-   * @param numPartitions The number of partitions for the data.
-   * @return a {@code PGroupedTable} instance that represents this grouping
-   */
-  PGroupedTable<K, V> groupByKey(int numPartitions);
-  
-  /**
-   * Performs a grouping operation on the keys of this table, using the
-   * additional {@code GroupingOptions} to control how the grouping is
-   * executed.
-   * 
-   * @param options The grouping options to use
-   * @return a {@code PGroupedTable} instance that represents the grouping
-   */
-  PGroupedTable<K, V> groupByKey(GroupingOptions options);
-
-  /**
-   * Writes this {@code PTable} to the given {@code Target}.
-   */
-  PTable<K, V> write(Target target);
-  
-  /**
-   * Returns the {@code PTableType} of this {@code PTable}.
-   */
-  PTableType<K, V> getPTableType();
-  
-  /**
-   * Returns the {@code PType} of the key.
-   */
-  PType<K> getKeyType();
-
-  /**
-   * Returns the {@code PType} of the value.
-   */
-  PType<V> getValueType();
-  
-  /**
-   * Aggregate all of the values with the same key into a single
-   * key-value pair in the returned PTable.
-   */
-  PTable<K, Collection<V>> collectValues();
-  
-  /**
-   * Returns a PTable made up of the pairs in this PTable with the
-   * largest value field.
-   * @param count The number of pairs to return
-   */
-  PTable<K, V> top(int count);
-  
-  /**
-   * Returns a PTable made up of the pairs in this PTable with the
-   * smallest value field.
-   * @param count The number of pairs to return
-   */
-  PTable<K, V> bottom(int count);
-  
-  /**
-   * Perform an inner join on this table and the one passed in as
-   * an argument on their common keys.
-   */
-  <U> PTable<K, Pair<V, U>> join(PTable<K, U> other);
-  
-  /**
-   * Co-group operation with the given table on common keys.
-   */
-  <U> PTable<K, Pair<Collection<V>, Collection<U>>> cogroup(PTable<K, U> other);
-
-  /**
-   * Returns a {@link PCollection} made up of the keys in this PTable.
-   */  
-  PCollection<K> keys();
-  
-  /**
-   * Returns a {@link PCollection} made up of the values in this PTable.
-   */
-  PCollection<V> values();
-  
-  /**
-   * Returns a Map<K, V> made up of the keys and values in this PTable.
-   * <p>
-   * <b>Note:</b> The contents of the returned map may not be exactly the same
-   * as this PTable, as a PTable is a multi-map (i.e. can contain multiple
-   * values for a single key).
-   */
-  Map<K, V> materializeToMap();
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/5accc9ac/src/main/java/com/cloudera/crunch/Pair.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/cloudera/crunch/Pair.java b/src/main/java/com/cloudera/crunch/Pair.java
deleted file mode 100644
index f480ea2..0000000
--- a/src/main/java/com/cloudera/crunch/Pair.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/**
- * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved.
- *
- * Cloudera, Inc. licenses this file to you under the Apache License,
- * Version 2.0 (the "License"). You may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for
- * the specific language governing permissions and limitations under the
- * License.
- */
-
-package com.cloudera.crunch;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-
-/**
- * A convenience class for two-element {@link Tuple}s.
- */
-public class Pair<K, V> implements Tuple, Comparable<Pair<K, V>> {
-
-  private final K first;
-  private final V second;
-
-  public static <T, U> Pair<T, U> of(T first, U second) {
-    return new Pair<T, U>(first, second);
-  }
-
-  public Pair(K first, V second) {
-    this.first = first;
-    this.second = second;
-  }
-
-  public K first() {
-    return first;
-  }
-
-  public V second() {
-    return second;
-  }
-
-  public Object get(int index) {
-    switch (index) {
-    case 0:
-      return first;
-    case 1:
-      return second;
-    default:
-      throw new ArrayIndexOutOfBoundsException();
-    }
-  }
-
-  public int size() {
-    return 2;
-  }
-  
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    return hcb.append(first).append(second).toHashCode();
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj)
-      return true;
-    if (obj == null)
-      return false;
-    if (getClass() != obj.getClass())
-      return false;
-    Pair<?, ?> other = (Pair<?, ?>) obj;
-    return (first == other.first || (first != null && first.equals(other.first))) &&
-    	(second == other.second || (second != null && second.equals(other.second)));
-  }
-
-  @Override
-  public String toString() {
-	StringBuilder sb = new StringBuilder("[");
-	sb.append(first).append(",").append(second).append("]");
-	return sb.toString();
-  }
-
-  private int cmp(Object lhs, Object rhs) {
-    if (lhs == rhs) {
-      return 0;
-    } else if (lhs != null && Comparable.class.isAssignableFrom(lhs.getClass())) {
-      return ((Comparable) lhs).compareTo(rhs);
-    }
-    return (lhs == null ? 0 : lhs.hashCode()) - (rhs == null ? 0 : rhs.hashCode());
-  }
-  
-  @Override  
-  public int compareTo(Pair<K, V> o) {
-    int diff = cmp(first, o.first);
-    if (diff == 0) {
-      diff = cmp(second, o.second);
-    }
-    return diff;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/5accc9ac/src/main/java/com/cloudera/crunch/Pipeline.java
----------------------------------------------------------------------
diff --git a/src/main/java/com/cloudera/crunch/Pipeline.java b/src/main/java/com/cloudera/crunch/Pipeline.java
deleted file mode 100644
index 4a233a9..0000000
--- a/src/main/java/com/cloudera/crunch/Pipeline.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/**
- * Copyright (c) 2011, Cloudera, Inc. All Rights Reserved.
- *
- * Cloudera, Inc. licenses this file to you under the Apache License,
- * Version 2.0 (the "License"). You may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for
- * the specific language governing permissions and limitations under the
- * License.
- */
-package com.cloudera.crunch;
-
-import org.apache.hadoop.conf.Configuration;
-
-/**
- * Manages the state of a pipeline execution.
- * 
- */
-public interface Pipeline {
-  
-  /**
-   * Set the {@code Configuration} to use with this pipeline.
-   */
-  void setConfiguration(Configuration conf);
-  
-  /**
-   * Returns the name of this pipeline.
-   * @return Name of the pipeline
-   */
-  String getName();
-  
-  /**
-   * Returns the {@code Configuration} instance associated with this pipeline.
-   */
-  Configuration getConfiguration();
-  
-  /**
-   * Converts the given {@code Source} into a {@code PCollection} that is
-   * available to jobs run using this {@code Pipeline} instance.
-   * 
-   * @param source The source of data
-   * @return A PCollection that references the given source
-   */
-  <T> PCollection<T> read(Source<T> source);
-
-  /**
-   * A version of the read method for {@code TableSource} instances that
-   * map to {@code PTable}s.
-   * @param tableSource The source of the data
-   * @return A PTable that references the given source
-   */
-  <K, V> PTable<K, V> read(TableSource<K, V> tableSource);
-  
-  /**
-   * Write the given collection to the given target on the next
-   * pipeline run.
-   * 
-   * @param collection The collection
-   * @param target The output target
-   */
-  void write(PCollection<?> collection, Target target);
-
-  /**
-   * Create the given PCollection and read the data it contains
-   * into the returned Collection instance for client use.
-   *
-   * @param pcollection The PCollection to materialize
-   * @return the data from the PCollection as a read-only Collection
-   */
-  <T> Iterable<T> materialize(PCollection<T> pcollection);
-  
-  /**
-   * Constructs and executes a series of MapReduce jobs in order
-   * to write data to the output targets.
-   */
-  PipelineResult run();
-
-  /**
-   * Run any remaining jobs required to generate outputs and then
-   * clean up any intermediate data files that were created in
-   * this run or previous calls to {@code run}.
-   */
-  PipelineResult done();
-
-  /**
-   * A convenience method for reading a text file.
-   */
-  PCollection<String> readTextFile(String pathName);
-
-  /**
-   * A convenience method for writing a text file.
-   */
-  <T> void writeTextFile(PCollection<T> collection, String pathName);
-  
-  /**
-   * Turn on debug logging for jobs that are run from this pipeline.
-   */
-  void enableDebug();
-}


Mime
View raw message