crunch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jwi...@apache.org
Subject svn commit: r1635033 - /crunch/site/trunk/content/user-guide.mdtext
Date Wed, 29 Oct 2014 03:38:33 GMT
Author: jwills
Date: Wed Oct 29 03:38:33 2014
New Revision: 1635033

URL: http://svn.apache.org/r1635033
Log:
Battling Markdown, Take 3

Modified:
    crunch/site/trunk/content/user-guide.mdtext

Modified: crunch/site/trunk/content/user-guide.mdtext
URL: http://svn.apache.org/viewvc/crunch/site/trunk/content/user-guide.mdtext?rev=1635033&r1=1635032&r2=1635033&view=diff
==============================================================================
--- crunch/site/trunk/content/user-guide.mdtext (original)
+++ crunch/site/trunk/content/user-guide.mdtext Wed Oct 29 03:38:33 2014
@@ -478,77 +478,73 @@ can be used to kick off a shuffle on the
   }
 </pre>
 
-If you find yourself in a situation where you have a PCollection&lt;Pair&lt;K, V&gt;&gt;
and you need a PTable&lt;K, V&gt;, the
+If you find yourself in a situation where you have a `PCollection<Pair<K, V>>`
and you need a `PTable<K, V>`, the
 [PTables](apidocs/0.10.0/org/apache/crunch/lib/PTables.html) library class has methods that
will do the conversion for you.
 
 Let's look at some more example PTypes created using the common primitive and collection
types. For most of your pipelines,
 you will use one type family exclusively, and so you can cut down on some of the boilerplate
in your classes by importing
 all of the methods from the `Writables` or `Avros` classes into your class:
 
-<pre>
-// Import all of the PType factory methods from Avros
-import static org.apache.crunch.types.avro.Avros.*;
-
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.TupleN;
-
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.Map;
-
-public class MyPipeline {
-
-  // Common primitive types
-  PType&lt;Integer&gt; intType = ints();
-  PType&lt;Long&gt; longType = longs();
-  PType&lt;Double&gt; doubleType = doubles();
-  // Bytes are represented by java.nio.ByteBuffer
-  PType&lt;ByteBuffer&gt; bytesType = bytes();
-
-  // A PTableType: using tableOf will return a PTable instead of a
-  // PCollection from a parallelDo call.
-  PTableType&lt;String, Boolean&gt; tableType = tableOf(strings(), booleans());
- 
-  // Pair types: 
-  PType&lt;Pair&lt;String, Boolean&gt;&gt; pairType = pairs(strings(), booleans());

-  PType&lt;Pair&lt;String, Pair&lt;Long, Long&gt;&gt; nestedPairType
= pairs(strings(), pairs(longs(), longs()));
-
-  // A triple
-  PType&lt;Tuple3&lt;Long, Float, Float&gt;&gt; tripType = trips(longs(),
floats(), floats());
-  // An arbitrary length tuple-- note that we lose the generic type information
-  PType&lt;TupleN&gt; tupleType = tupleN(ints(), ints(), floats(), strings(), strings(),
ints());
-
-  // A Collection type
-  PType&lt;Collection&lt;Long&gt;&gt; longsType = collections(longs());
-  // A Map Type-- note that the keys are always strings, we only specify the value.
-  PType&lt;Map&lt;String, Boolean&gt;&gt; mapType = maps(booleans());
-
-  // A Pair of collections
-  PType&lt;Pair&lt;Collection&lt;String&gt;, Collection&lt;Long&gt;&gt;&gt;
pairColType = pairs(
-      collections(strings()),
-      collections(longs()));
-}
-</pre>
+	// Import all of the PType factory methods from Avros
+	import static org.apache.crunch.types.avro.Avros.*;
+	
+	import org.apache.crunch.Pair;
+	import org.apache.crunch.Tuple3;
+	import org.apache.crunch.TupleN;
+	
+	import java.nio.ByteBuffer;
+	import java.util.Collection;
+	import java.util.Map;
+	
+	public class MyPipeline {
+	
+	  // Common primitive types
+	  PType<Integer> intType = ints();
+	  PType<Long> longType = longs();
+	  PType<Double> doubleType = doubles();
+	  // Bytes are represented by java.nio.ByteBuffer
+	  PType<ByteBuffer> bytesType = bytes();
+	
+	  // A PTableType: using tableOf will return a PTable instead of a
+	  // PCollection from a parallelDo call.
+	  PTableType<String, Boolean> tableType = tableOf(strings(), booleans());
+	 
+	  // Pair types: 
+	  PType<Pair<String, Boolean>> pairType = pairs(strings(), booleans()); 
+	  PType<Pair<String, Pair<Long, Long>> nestedPairType = pairs(strings(),
pairs(longs(), longs()));
+
+	  // A triple
+	  PType<Tuple3<Long, Float, Float>> tripType = trips(longs(), floats(), floats());
+	  // An arbitrary length tuple-- note that we lose the generic type information
+	  PType<TupleN> tupleType = tupleN(ints(), ints(), floats(), strings(), strings(),
ints());
+	
+	  // A Collection type
+	  PType<Collection<Long>> longsType = collections(longs());
+	  // A Map Type-- note that the keys are always strings, we only specify the value.
+	  PType<Map<String, Boolean>> mapType = maps(booleans());
+	
+	  // A Pair of collections
+	  PType<Pair<Collection<String>, Collection<Long>>> pairColType
= pairs(
+	      collections(strings()),
+	      collections(longs()));
+	}
 
 Both type families also have a method named `PType<T> records(Class<T> clazz)`
that can be used to create PTypes that support the common
 record format for each type family. For the WritableTypeFamily, the records method supports
PTypes for implementations of the `Writable`
 interface, and for the AvroTypeFamily, the records method supports PTypes for implementations
of Avro's `IndexedRecord` interface, which
 includes both Avro generic and specific records:
 
-<pre>
-  PType&lt;FooWritable&gt; fwType1 = Writables.records(FooWritable.class);
-  // The more obvious "writables" method also works.
-  PType&lt;FooWritable&gt; fwType = Writables.writables(FooWritable.class);
-
-  // For a generated Avro class, this works:
-  PType&lt;Person&gt; personType1 = Avros.records(Person.class);
-  // As does this:
-  PType&lt;Person&gt; personType2 = Avros.containers(Person.class); 
-  // If you only have a schema, you can create a generic type, like this:
-  org.apache.avro.Schema schema = ...;
-  PType&lt;Record&gt; avroGenericType = Avros.generics(schema);
-</pre>
+	PType<FooWritable> fwType1 = Writables.records(FooWritable.class);
+	// The more obvious "writables" method also works.
+	PType<FooWritable> fwType = Writables.writables(FooWritable.class);
+
+	// For a generated Avro class, this works:
+	PType<Person> personType1 = Avros.records(Person.class);
+	// As does this:
+	PType<Person> personType2 = Avros.containers(Person.class); 
+	// If you only have a schema, you can create a generic type, like this:
+	org.apache.avro.Schema schema = ...;
+	PType<Record> avroGenericType = Avros.generics(schema);
 
 The [Avros](apidocs/0.10.0/org/apache/crunch/types/avro/Avros.html) class also has a `reflects`
method for creating PTypes
 for POJOs using Avro's reflection-based serialization mechanism. There are a couple of restrictions
on the structure of
@@ -558,26 +554,24 @@ the POJO:
 2. All of its fields must be Avro primitive types or collection types that have Avro equivalents,
like `ArrayList` and
 `HashMap<String, T>`. You may also have arrays of Avro primitive types.
 
-<pre>
-  // Declare an inline data type and use it for Crunch serialization
-  public static class UrlData {
-    // The fields don't have to be public, just doing this for the example.
-    double curPageRank;
-    String[] outboundUrls;
-
-    // Remember: you must have a no-arg constructor. 
-    public UrlData() { this(0.0, new String[0]); }
-
-    // The regular constructor
-    public UrlData(double pageRank, String[] outboundUrls) {
-      this.curPageRank = pageRank;
-      this.outboundUrls = outboundUrls;
-    }
-  }
-
-  PType&lt;UrlData&gt; urlDataType = Avros.reflects(UrlData.class);
-  PTableType&lt;String, UrlData&gt; pageRankType = Avros.tableOf(Avros.strings(),
urlDataType);
-</pre>
+	// Declare an inline data type and use it for Crunch serialization
+	public static class UrlData {
+	  // The fields don't have to be public, just doing this for the example.
+	  double curPageRank;
+	  String[] outboundUrls;
+	
+	  // Remember: you must have a no-arg constructor. 
+	  public UrlData() { this(0.0, new String[0]); }
+	
+	  // The regular constructor
+	  public UrlData(double pageRank, String[] outboundUrls) {
+	    this.curPageRank = pageRank;
+	    this.outboundUrls = outboundUrls;
+	  }
+	}
+	
+	PType<UrlData> urlDataType = Avros.reflects(UrlData.class);
+	PTableType<String, UrlData> pageRankType = Avros.tableOf(Avros.strings(), urlDataType);
 
 Avro reflection is a great way to define intermediate types for your Crunch pipelines; not
only is your logic clear
 and easy to test, but the fact that the data is written out as Avro records means that you
can use tools like Hive and Pig



Mime
View raw message