bigtop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rnowl...@apache.org
Subject [11/23] bigtop git commit: BIGTOP-1983. Move BigPetStore data generator to bigtop-data-generators
Date Tue, 25 Aug 2015 13:48:11 GMT
http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Driver.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Driver.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Driver.java
new file mode 100644
index 0000000..3fdef68
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Driver.java
@@ -0,0 +1,315 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.cli;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStream;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.bigtop.bigpetstore.datagenerator.DataLoader;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Customer;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Product;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Transaction;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.InputData;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ProductCategory;
+import org.apache.bigtop.bigpetstore.datagenerator.generators.purchase.PurchasingModel;
+import org.apache.commons.lang3.tuple.Pair;
+
+
+public class Driver
+{
+	int nStores;
+	int nCustomers;
+	int nPurchasingModels;
+	double simulationTime;
+	long seed;
+	File outputDir;
+	
+	static final int NPARAMS = 6;
+	
+	private void printUsage()
+	{
+		String usage = "BigPetStore Data Generator\n" +
+				"\n" +
+				"Usage: java -jar bps-data-generator-v0.2.java outputDir nStores nCustomers nPurchasingModels simulationLength seed\n" +
+				"\n" + 
+				"outputDir - (string) directory to write files\n" +
+				"nStores - (int) number of stores to generate\n" +
+				"nCustomers - (int) number of customers to generate\n" +
+				"nPurchasingModels - (int) number of purchasing models to generate\n" + 
+				"simulationLength - (float) number of days to simulate\n" +
+				"seed - (long) seed for RNG. If not given, one is reandomly generated.\n";
+		
+		System.out.println(usage);
+	}
+	
+	public void parseArgs(String[] args)
+	{
+		if(args.length != NPARAMS && args.length != (NPARAMS - 1))
+		{
+			printUsage();
+			System.exit(1);
+		}
+		
+		int i = -1;
+		
+		outputDir = new File(args[++i]);
+		if(! outputDir.exists())
+		{
+			System.err.println("Given path (" + args[i] + ") does not exist.\n");
+			printUsage();
+			System.exit(1);
+		}
+		
+		if(! outputDir.isDirectory())
+		{
+			System.err.println("Given path (" + args[i] + ") is not a directory.\n");
+			printUsage();
+			System.exit(1);
+		}
+		
+		try
+		{
+			nStores = Integer.parseInt(args[++i]);
+		}
+		catch(Exception e)
+		{
+			System.err.println("Unable to parse '" + args[i] + "' as an integer for nStores.\n");
+			printUsage();
+			System.exit(1);
+		}
+		
+		try
+		{
+			nCustomers = Integer.parseInt(args[++i]);
+		}
+		catch(Exception e)
+		{
+			System.err.println("Unable to parse '" + args[i] + "' as an integer for nCustomers.\n");
+			printUsage();
+			System.exit(1);
+		}
+		
+		try
+		{
+			nPurchasingModels = Integer.parseInt(args[++i]);
+		}
+		catch(Exception e)
+		{
+			System.err.println("Unable to parse '" + args[i] + "' as an integer for nPurchasingModels.\n");
+			printUsage();
+			System.exit(1);
+		}
+		
+		try
+		{
+			simulationTime = Double.parseDouble(args[++i]);
+		}
+		catch(Exception e)
+		{
+			System.err.println("Unable to parse '" + args[i] + "' as a float for simulationLength.\n");
+			printUsage();
+			System.exit(1);
+		}
+		
+		try
+		{
+			seed = Long.parseLong(args[++i]);
+		}
+		catch(Exception e)
+		{
+			System.err.println("Unable to parse '" + args[i] + "' as a long for the seed.\n");
+			printUsage();
+			System.exit(1);
+		}
+	}
+	
+	private void writeTransactions(Collection<Transaction> transactions) throws Exception
+	{
+		File outputFile = new File(outputDir.toString() + File.separator + "transactions.txt");
+		System.out.println(outputFile.toString());
+		OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
+		
+		for(Transaction transaction : transactions)
+		{
+			for(Product product : transaction.getProducts())
+			{
+				String record = transaction.getId() + ",";
+				record += transaction.getDateTime() + ",";
+				record += transaction.getStore().getId() + ",";
+				record += transaction.getStore().getLocation().getZipcode() + ",";
+				record += transaction.getStore().getLocation().getCity() + ",";
+				record += transaction.getStore().getLocation().getState() + ",";
+				record += transaction.getCustomer().getId() + ",";
+				Pair<String, String> name = transaction.getCustomer().getName();
+				record += name.getLeft() + " " + name.getRight() + ",";
+				record += transaction.getCustomer().getLocation().getZipcode() + ",";
+				record += transaction.getCustomer().getLocation().getCity() + ",";
+				record += transaction.getCustomer().getLocation().getState() + ",";
+				record += product.toString() + "\n";
+				
+				outputStream.write(record.getBytes());
+			}
+		}
+		
+		outputStream.close();
+	}
+	
+	private void writeCustomers(Collection<Customer> customers) throws Exception
+	{
+		File outputFile = new File(outputDir.toString() + File.separator + "customers.txt");
+		System.out.println(outputFile.toString());
+		OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
+
+		for(Customer customer : customers)
+		{
+			String record = customer.getId() + ",";
+			Pair<String, String> name = customer.getName();
+			record += name.getLeft() + "," + name.getRight() + ",";
+			record += customer.getLocation().getZipcode() + ",";
+			record += customer.getLocation().getCity() + ",";
+			record += customer.getLocation().getState() + "\n";
+
+			outputStream.write(record.getBytes());
+		}
+
+		outputStream.close();
+	}
+
+	private void writeStores(Collection<Store> stores) throws Exception
+	{
+		File outputFile = new File(outputDir.toString() + File.separator + "stores.txt");
+		System.out.println(outputFile.toString());
+		OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
+
+		for(Store store : stores)
+		{
+			String record = store.getId() + ",";
+			record += store.getLocation().getZipcode() + ",";
+			record += store.getLocation().getCity() + ",";
+			record += store.getLocation().getState() + "\n";
+
+			outputStream.write(record.getBytes());
+		}
+
+		outputStream.close();
+	}
+
+	private void writeProducts(Collection<ProductCategory> productCategories) throws Exception
+	{
+		File outputFile = new File(outputDir.toString() + File.separator + "products.txt");
+		System.out.println(outputFile.toString());
+		OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
+
+		for(ProductCategory category : productCategories)
+		{
+
+			for(Product product : category.getProducts())
+			{
+				String record = category.getCategoryLabel() + ",";
+				record += product.toString() + "\n";
+
+				outputStream.write(record.getBytes());
+			}
+		}
+
+		outputStream.close();
+	}
+
+	private void writePurchasingProfiles(List<ProductCategory> productCategories, List<PurchasingModel> profiles) throws Exception
+	{
+		File outputFile = new File(outputDir.toString() + File.separator + "purchasing_profiles.txt");
+		System.out.println(outputFile.toString());
+		OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
+
+		for(ProductCategory category : productCategories)
+		{
+			int i = 0;
+			for(PurchasingModel model : profiles)
+			{
+				Object productModel = model.getProfile(category.getCategoryLabel());
+				String record = productModel.toString();
+
+				outputStream.write(record.getBytes());
+
+				i += 1;
+			}
+		}
+
+		outputStream.close();
+	}
+
+	public Simulation buildSimulation(InputData inputData)
+	{
+		return new Simulation(inputData, nStores, nCustomers, nPurchasingModels, simulationTime, seed);
+	}
+	
+	private void run(InputData inputData) throws Exception
+	{
+		Simulation simulation = buildSimulation(inputData);
+		
+		simulation.simulate();
+		
+		writeStores(simulation.getStores());
+		writeCustomers(simulation.getCustomers());
+		writeProducts(simulation.getProductCategories());
+		writePurchasingProfiles(simulation.getProductCategories(), simulation.getPurchasingProfiles());
+		writeTransactions(simulation.getTransactions());
+	}	
+	public void run(String[] args) throws Exception
+	{
+		parseArgs(args);
+		
+		InputData inputData = (new DataLoader()).loadData();
+		
+		run(inputData);
+	}
+	
+	public static void main(String[] args) throws Exception
+	{
+		Driver driver = new Driver();
+		driver.run(args);		
+	}
+	
+	public Double getSimulationLength()
+	{
+		return simulationTime;
+	}
+	
+	public int getNCustomers()
+	{
+		return nCustomers;
+	}
+	
+	public long getSeed()
+	{
+		return seed;
+	}
+	
+	public int getNStores()
+	{
+		return nStores;
+	}
+	
+	public File getOutputDir()
+	{
+		return outputDir;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Simulation.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Simulation.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Simulation.java
new file mode 100644
index 0000000..4b9b500
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Simulation.java
@@ -0,0 +1,188 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.cli;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Vector;
+
+import org.apache.bigtop.bigpetstore.datagenerator.Constants;
+import org.apache.bigtop.bigpetstore.datagenerator.CustomerGenerator;
+import org.apache.bigtop.bigpetstore.datagenerator.ProductGenerator;
+import org.apache.bigtop.bigpetstore.datagenerator.PurchasingModelGenerator;
+import org.apache.bigtop.bigpetstore.datagenerator.StoreGenerator;
+import org.apache.bigtop.bigpetstore.datagenerator.TransactionGenerator;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Customer;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Transaction;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.InputData;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ProductCategory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.RouletteWheelSampler;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler;
+import org.apache.bigtop.bigpetstore.datagenerator.generators.purchase.PurchasingModel;
+
+import com.google.common.collect.Lists;
+
+public class Simulation
+{
+	InputData inputData;
+	SeedFactory seedFactory;
+	int nStores;
+	int nCustomers;
+	int nPurchasingModels;
+	double simulationTime;
+	
+	List<Store> stores;
+	List<Customer> customers;
+	Sampler<PurchasingModel> purchasingModelSampler;
+	List<PurchasingModel> purchasingProfiles;
+	List<Transaction> transactions;
+	List<ProductCategory> productCategories;
+	
+	public Simulation(InputData inputData, int nStores, int nCustomers, int nPurchasingModels, double simulationTime, long seed)
+	{
+		this.inputData = inputData;
+		this.nStores = nStores;
+		this.nCustomers = nCustomers;
+		this.nPurchasingModels = nPurchasingModels;
+		this.simulationTime = simulationTime;
+		seedFactory = new SeedFactory(seed);
+	}
+	
+	public void generateStores() throws Exception
+	{
+		System.out.println("Generating stores");
+		StoreGenerator storeGenerator = new StoreGenerator(inputData, seedFactory);
+		
+		stores = new Vector<Store>();
+		for(int i = 0; i < nStores; i++)
+		{
+			Store store = storeGenerator.generate();
+			stores.add(store);
+		}
+		
+		stores = Collections.unmodifiableList(stores);
+		
+		System.out.println("Generated " + stores.size() + " stores");
+	}
+	
+	public void generateCustomers() throws Exception
+	{
+		System.out.println("Generating customers");
+		CustomerGenerator generator = new CustomerGenerator(inputData, stores, seedFactory);
+		
+		customers = new Vector<Customer>();
+		for(int i = 0; i < nCustomers; i++)
+		{
+			Customer customer = generator.generate();
+			customers.add(customer);
+		}
+		
+		customers = Collections.unmodifiableList(customers);
+		
+		System.out.println("Generated " + customers.size() + " customers");
+	}
+	
+	public void generateProducts()
+	{
+		System.out.println("Generating products");
+		ProductGenerator generator = new ProductGenerator(Constants.PRODUCTS_COLLECTION);
+		productCategories = generator.generate();
+	}
+
+	public void generatePurchasingProfiles() throws Exception
+	{
+		System.out.println("Generating purchasing profiles");
+		PurchasingModelGenerator generator = new PurchasingModelGenerator(productCategories, seedFactory);
+		
+		purchasingProfiles = new Vector<PurchasingModel>();
+		for(int i = 0; i < nPurchasingModels; i++)
+		{
+			PurchasingModel profile = generator.generate();
+			purchasingProfiles.add(profile);
+		}
+		
+		System.out.println("Generated " + purchasingProfiles.size() + " purchasing profiles");
+		
+		purchasingModelSampler = RouletteWheelSampler.createUniform(purchasingProfiles, seedFactory);
+	}
+	
+	public void generateTransactions() throws Exception
+	{
+		System.out.println("Generating transactions");
+		transactions = Lists.newArrayList();
+		
+		for(int i = 0; i < nCustomers; i++)
+		{
+			Customer customer = customers.get(i);
+			PurchasingModel profile = purchasingModelSampler.sample();
+			
+			TransactionGenerator generator = new TransactionGenerator(customer,
+					profile, productCategories, seedFactory);
+			
+			while(true)
+			{
+				Transaction transaction = generator.generate();
+				
+				if(transaction.getDateTime() > simulationTime)
+					break;
+				transactions.add(transaction);
+			}
+		}
+		
+		System.out.println("Generated " + transactions.size() + " transactions");
+	}
+	
+	public void simulate() throws Exception
+	{
+		generateStores();
+		generateCustomers();
+		generateProducts();
+		generatePurchasingProfiles();
+		generateTransactions();
+	}
+
+	public List<Store> getStores()
+	{
+		return stores;
+	}
+
+	public List<Customer> getCustomers()
+	{
+		return customers;
+	}
+
+	public List<Transaction> getTransactions()
+	{
+		return transactions;
+	}
+	
+	public InputData getInputData()
+	{
+		return inputData;
+	}
+
+	public List<ProductCategory> getProductCategories()
+	{
+		return this.productCategories;
+	}
+
+	public List<PurchasingModel> getPurchasingProfiles()
+	{
+		return this.purchasingProfiles;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Customer.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Customer.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Customer.java
new file mode 100644
index 0000000..704c965
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Customer.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datamodels;
+
+import java.io.Serializable;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord;
+import org.apache.commons.lang3.tuple.Pair;
+
+public class Customer implements Serializable
+{
+	private static final long serialVersionUID = 5739806281335931258L;
+	
+	int id;
+	Pair<String, String> name;
+	ZipcodeRecord location;
+	Store store;
+	
+	public Customer(int id, Pair<String, String> name, Store store, ZipcodeRecord location)
+	{
+		this.id = id;
+		this.name = name;
+		this.location = location;
+		this.store = store;
+	}
+
+	public int getId()
+	{
+		return id;
+	}
+
+	public Pair<String, String> getName()
+	{
+		return name;
+	}
+	
+	public ZipcodeRecord getLocation()
+	{
+		return location;
+	}
+	
+	public Store getStore()
+	{
+		return store;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/PetSpecies.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/PetSpecies.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/PetSpecies.java
new file mode 100644
index 0000000..d4bab79
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/PetSpecies.java
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datamodels;
+
+public enum PetSpecies
+{
+	DOG,
+	CAT;
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Product.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Product.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Product.java
new file mode 100644
index 0000000..1d4e074
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Product.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datamodels;
+
+import java.io.Serializable;
+import java.util.Map;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+
+public class Product implements Serializable
+{
+	private static final long serialVersionUID = 4519472063058037956L;
+	
+	ImmutableMap<String, Object> fields;
+	
+	public Product(Map<String, Object> fields)
+	{
+		this.fields = ImmutableMap.copyOf(fields);
+	}
+	
+	public ImmutableSet<String> getFieldNames()
+	{
+		return fields.keySet();
+	}
+	
+	public Object getFieldValue(String fieldName)
+	{
+		return fields.get(fieldName);
+	}
+	
+	public String getFieldValueAsString(String fieldName)
+	{
+		return fields.get(fieldName).toString();
+	}
+	
+	public Double getFieldValueAsDouble(String fieldName)
+	{
+		Object value = getFieldValue(fieldName);
+		try
+		{
+			Double doubleValue = (Double) value;
+			return doubleValue;
+		}
+		catch(ClassCastException e)
+		{
+			return null;
+		}
+	}
+	
+	public Long getFieldValueAsLong(String fieldName)
+	{
+		Object value = getFieldValue(fieldName);
+		try
+		{
+			Long longValue = (Long) value;
+			return longValue;
+		}
+		catch(ClassCastException e)
+		{
+			try
+			{
+				Integer intValue = (Integer) value;
+				return new Long(intValue);
+			}
+			catch(ClassCastException f)
+			{
+				return null;
+			}
+		}
+	}
+	
+	public String toString()
+	{
+		String str = "";
+		for(Map.Entry<String, Object> entry : fields.entrySet())
+		{
+			str += entry.getKey() + "=" + entry.getValue() + ";";
+		}
+		
+		return str;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Store.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Store.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Store.java
new file mode 100644
index 0000000..50e333a
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Store.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datamodels;
+
+import java.io.Serializable;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord;
+
+public class Store implements Serializable
+{
+	private static final long serialVersionUID = 2347066623022747969L;
+	
+	int id;
+	String name;
+	ZipcodeRecord location;
+	
+	public Store(int id, String name, ZipcodeRecord location)
+	{
+		this.id = id;
+		this.name = name;
+		this.location = location;
+	}
+	
+	public int getId()
+	{
+		return id;
+	}
+	
+	public String getName()
+	{
+		return name;
+	}
+	
+	public ZipcodeRecord getLocation()
+	{
+		return location;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Transaction.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Transaction.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Transaction.java
new file mode 100644
index 0000000..6a78e44
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Transaction.java
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datamodels;
+
+import java.io.Serializable;
+import java.util.List;
+
+import com.google.common.collect.ImmutableList;
+
+public class Transaction implements Serializable
+{
+	private static final long serialVersionUID = 103133601154354349L;
+	
+	final int id;
+	final Customer customer;
+	final Store store;
+	final Double dateTime;
+	final ImmutableList<Product> products;
+	
+	public Transaction(int id, Customer customer, Store store, Double dateTime, List<Product> products)
+	{
+		this.id = id;
+		this.customer = customer;
+		this.store = store;
+		this.dateTime = dateTime;
+		this.products = ImmutableList.copyOf(products);
+	}
+
+	public int getId()
+	{
+		return id;
+	}
+
+	public Customer getCustomer()
+	{
+		return customer;
+	}
+
+	public Store getStore()
+	{
+		return store;
+	}
+
+	public Double getDateTime()
+	{
+		return dateTime;
+	}
+
+	public ImmutableList<Product> getProducts()
+	{
+		return products;
+	}
+	
+	
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/InputData.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/InputData.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/InputData.java
new file mode 100644
index 0000000..4fad219
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/InputData.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs;
+
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.List;
+
+public class InputData implements Serializable
+{
+	private static final long serialVersionUID = 9078989799806707788L;
+	
+	List<ZipcodeRecord> zipcodeTable;
+	Names names;
+	
+	public InputData(List<ZipcodeRecord> zipcodeTable,
+			Names names)
+	{
+		this.zipcodeTable = Collections.unmodifiableList(zipcodeTable);
+		this.names = names;
+	}
+	
+	public List<ZipcodeRecord> getZipcodeTable()
+	{
+		return zipcodeTable;
+	}
+	
+	public Names getNames()
+	{
+		return names;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/Names.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/Names.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/Names.java
new file mode 100644
index 0000000..9402e02
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/Names.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs;
+
+import java.io.Serializable;
+import java.util.Map;
+
+import com.google.common.collect.ImmutableMap;
+
+public class Names implements Serializable
+{
+	private static final long serialVersionUID = 2731634747628534453L;
+	
+	final ImmutableMap<String, Double> firstNames;
+	final ImmutableMap<String, Double> lastNames;
+	
+	public Names(Map<String, Double> firstNames,
+			Map<String, Double> lastNames)
+	{
+		this.firstNames = ImmutableMap.copyOf(firstNames);
+		this.lastNames = ImmutableMap.copyOf(lastNames);
+	}
+
+	public ImmutableMap<String, Double> getFirstNames()
+	{
+		return firstNames;
+	}
+
+	public ImmutableMap<String, Double> getLastNames()
+	{
+		return lastNames;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategory.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategory.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategory.java
new file mode 100644
index 0000000..6fb0572
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategory.java
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.PetSpecies;
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Product;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+
+public class ProductCategory implements Serializable
+{
+	private static final long serialVersionUID = -7638076590334497836L;
+	
+	String categoryLabel;
+	ImmutableSet<PetSpecies> applicableSpecies;
+	ImmutableSet<String> fieldNames;
+	boolean triggerTransaction;
+	double dailyUsageRate;
+	double amountUsedPerPetAverage;
+	double amountUsedPerPetVariance;
+	double triggerTransactionRate;
+	double triggerPurchaseRate;
+	ImmutableList<Product> products;
+	
+	public ProductCategory(String categoryLabel, Set<PetSpecies> species, Set<String> fieldNames,
+			boolean triggerTransaction, double dailyUsageRate, double amountUsedPerPetAverage,
+				double amountUsedPerPetVariance, double triggerTransactionRate,
+				double triggerPurchaseRate, List<Product> products)
+	{
+		this.categoryLabel = categoryLabel;
+		this.applicableSpecies = ImmutableSet.copyOf(species);
+		this.fieldNames = ImmutableSet.copyOf(fieldNames);
+		this.triggerTransaction = triggerTransaction;
+		this.dailyUsageRate = dailyUsageRate;
+		this.amountUsedPerPetAverage = amountUsedPerPetAverage;
+		this.amountUsedPerPetVariance = amountUsedPerPetVariance;
+		this.triggerTransactionRate = triggerTransactionRate;
+		this.triggerPurchaseRate = triggerPurchaseRate;
+		this.products = ImmutableList.copyOf(products);
+	}
+	
+	public String getCategoryLabel()
+	{
+		return categoryLabel;
+	}
+	
+	public ImmutableSet<PetSpecies> getApplicableSpecies()
+	{
+		return applicableSpecies;
+	}
+	
+	public ImmutableSet<String> getFieldNames()
+	{
+		return fieldNames;
+	}
+	public Boolean getTriggerTransaction()
+	{
+		return triggerTransaction;
+	}
+	
+	public Double getDailyUsageRate()
+	{
+		return dailyUsageRate;
+	}
+	
+	public Double getBaseAmountUsedAverage()
+	{
+		return amountUsedPerPetAverage;
+	}
+	
+	public Double getBaseAmountUsedVariance()
+	{
+		return amountUsedPerPetVariance;
+	}
+	
+	public Double getTransactionTriggerRate()
+	{
+		return triggerTransactionRate;
+	}
+	
+	public Double getPurchaseTriggerRate()
+	{
+		return triggerPurchaseRate;
+	}
+	
+	public ImmutableList<Product> getProducts()
+	{
+		return products;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ZipcodeRecord.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ZipcodeRecord.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ZipcodeRecord.java
new file mode 100644
index 0000000..ef54712
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ZipcodeRecord.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs;
+
+import java.io.Serializable;
+
+import org.apache.commons.lang3.tuple.Pair;
+
+public class ZipcodeRecord implements Serializable
+{
+	private static final long serialVersionUID = 1769986686070108470L;
+	
+	final String zipcode;
+	final Pair<Double, Double> coordinates;
+	final String city;
+	final String state;
+	final double medianHouseholdIncome;
+	final long population;
+	
+	public ZipcodeRecord(String zipcode, Pair<Double, Double> coordinates,
+			String city, String state, double medianHouseholdIncome, long population)
+	{
+		this.city = city;
+		this.state = state;
+		this.zipcode = zipcode;
+		this.coordinates = coordinates;
+		this.medianHouseholdIncome = medianHouseholdIncome;
+		this.population = population;
+	}
+
+	public String getZipcode()
+	{
+		return zipcode;
+	}
+
+	public Pair<Double, Double> getCoordinates()
+	{
+		return coordinates;
+	}
+	
+	public double getMedianHouseholdIncome()
+	{
+		return medianHouseholdIncome;
+	}
+	
+	public long getPopulation()
+	{
+		return population;
+	}
+	
+	public double distance(ZipcodeRecord other)
+	{
+		if(other.getZipcode().equals(zipcode))
+			return 0.0;
+		
+		Pair<Double, Double> otherCoords = other.getCoordinates();
+		
+		double dist = Math.sin(Math.toRadians(coordinates.getLeft())) *
+				Math.sin(Math.toRadians(otherCoords.getLeft())) +
+				Math.cos(Math.toRadians(coordinates.getLeft())) *
+				Math.cos(Math.toRadians(otherCoords.getLeft())) *
+				Math.cos(Math.toRadians(coordinates.getRight() - otherCoords.getRight()));
+		dist = Math.toDegrees(Math.acos(dist)) * 69.09;
+		
+		return dist;		
+	}
+	
+	public String getCity()
+	{
+		return city;
+	}
+	
+	public String getState()
+	{
+		return state;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/NameReader.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/NameReader.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/NameReader.java
new file mode 100644
index 0000000..5a847ea
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/NameReader.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datareaders;
+
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.util.Map;
+import java.util.Scanner;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.Names;
+
+import com.google.common.collect.Maps;
+
+public class NameReader
+{
+	InputStream path;
+	
+	public NameReader(InputStream path)
+	{
+		this.path = path;
+	}
+	
+	public Names readData() throws FileNotFoundException
+	{
+		Scanner scanner = new Scanner(path);
+		
+		Map<String, Double> firstNames = Maps.newHashMap();
+		Map<String, Double> lastNames = Maps.newHashMap();
+		
+		while(scanner.hasNextLine())
+		{
+			String line = scanner.nextLine();
+			String[] cols = line.trim().split(",");
+			
+			String name = cols[0];
+			double weight = Double.parseDouble(cols[5]);
+			
+			if(cols[4].equals("1"))
+				firstNames.put(name, weight);
+			if(cols[3].equals("1"))
+				lastNames.put(name, weight);
+		}
+		
+		scanner.close();
+		
+		return new Names(firstNames, lastNames);
+		
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java
new file mode 100644
index 0000000..1612a92
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java
@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.datareaders;
+
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.Set;
+import java.util.Vector;
+
+import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord;
+import org.apache.commons.lang3.tuple.Pair;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+
+public class ZipcodeReader
+{
+	private static class ZipcodeLocationRecord
+	{
+		public final Pair<Double, Double> coordinates;
+		public final String state;
+		public final String city;
+		
+		public ZipcodeLocationRecord(Pair<Double, Double> coordinates,
+				String city, String state)
+		{
+			this.coordinates = coordinates;
+			this.city = city;
+			this.state = state;
+		}
+	}
+	
+	InputStream zipcodeIncomesFile = null;
+	InputStream zipcodePopulationFile = null;
+	InputStream zipcodeCoordinatesFile = null;
+	
+	public void setIncomesFile(InputStream path)
+	{
+		this.zipcodeIncomesFile = path;
+	}
+	
+	public void setPopulationFile(InputStream path)
+	{
+		this.zipcodePopulationFile = path;
+	}
+	
+	public void setCoordinatesFile(InputStream path)
+	{
+		this.zipcodeCoordinatesFile = path;
+	}
+	
+	private ImmutableMap<String, Double> readIncomeData(InputStream path) throws FileNotFoundException
+	{
+		Scanner scanner = new Scanner(path);
+		
+		// skip headers
+		scanner.nextLine();
+		scanner.nextLine();
+		
+		Map<String, Double> entries = Maps.newHashMap();
+		while(scanner.hasNextLine())
+		{
+			String line = scanner.nextLine().trim();
+			String[] cols = line.split(",");
+			// zipcodes are in the form "ZCTA5 XXXXX"
+			String zipcode = cols[2].split(" ")[1].trim();
+			try
+			{
+				double medianHouseholdIncome = Integer.parseInt(cols[5].trim());
+				entries.put(zipcode, medianHouseholdIncome);
+			}
+			catch(NumberFormatException e)
+			{
+				
+			}
+		}
+		
+		scanner.close();
+		
+		return ImmutableMap.copyOf(entries);
+	}
+	
+	private ImmutableMap<String, Long> readPopulationData(InputStream path) throws FileNotFoundException
+	{
+		Scanner scanner = new Scanner(path);
+		
+		// skip header
+		scanner.nextLine();
+		
+		Map<String, Long> entries = Maps.newHashMap();
+		while(scanner.hasNextLine())
+		{
+			String line = scanner.nextLine().trim();
+			
+			if(line.length() == 0)
+				continue;
+			
+			String[] cols = line.split(",");
+			
+			String zipcode = cols[0].trim();
+			Long population = Long.parseLong(cols[1].trim());
+			
+			if(entries.containsKey(zipcode))
+			{
+				entries.put(zipcode, Math.max(entries.get(zipcode), population));
+			}
+			else
+			{
+				entries.put(zipcode, population);
+			}
+		}
+		
+		scanner.close();
+		
+		return ImmutableMap.copyOf(entries);
+	}
+	
+	private ImmutableMap<String, ZipcodeLocationRecord> readCoordinates(InputStream path) throws FileNotFoundException
+	{
+		Scanner scanner = new Scanner(path);
+		
+		// skip header
+		scanner.nextLine();
+		
+		Map<String, ZipcodeLocationRecord> entries = Maps.newHashMap();
+		while(scanner.hasNextLine())
+		{
+			String line = scanner.nextLine().trim();
+			
+			String[] cols = line.split(", ");
+			
+			// remove quote marks
+			String zipcode = cols[0].substring(1, cols[0].length() - 1);
+			String state = cols[1].substring(1, cols[1].length() - 1);
+			Double latitude = Double.parseDouble(cols[2].substring(1, cols[2].length() - 1));
+			Double longitude = Double.parseDouble(cols[3].substring(1, cols[3].length() - 1));
+			String city = cols[4].substring(1, cols[4].length() - 1);
+			
+			Pair<Double, Double> coords = Pair.of(latitude, longitude);
+
+			ZipcodeLocationRecord record = new ZipcodeLocationRecord(coords, city, state);
+			
+			entries.put(zipcode, record);
+		}
+		
+		scanner.close();
+		
+		return ImmutableMap.copyOf(entries);
+	}
+	
+	public ImmutableList<ZipcodeRecord> readData() throws FileNotFoundException
+	{
+		ImmutableMap<String, Double> incomes = readIncomeData(this.zipcodeIncomesFile);
+		ImmutableMap<String, Long> populations = readPopulationData(this.zipcodePopulationFile);
+		ImmutableMap<String, ZipcodeLocationRecord> coordinates = readCoordinates(this.zipcodeCoordinatesFile);
+		
+		Set<String> zipcodeSubset = new HashSet<String>(incomes.keySet());
+		zipcodeSubset.retainAll(populations.keySet());
+		zipcodeSubset.retainAll(coordinates.keySet());
+		
+		List<ZipcodeRecord> table = new Vector<ZipcodeRecord>();
+		for(String zipcode : zipcodeSubset)
+		{
+			ZipcodeRecord record = new ZipcodeRecord(zipcode, 
+					coordinates.get(zipcode).coordinates, 
+					coordinates.get(zipcode).city,
+					coordinates.get(zipcode).state,
+					incomes.get(zipcode),
+					populations.get(zipcode));
+			table.add(record);
+		}
+		
+		return ImmutableList.copyOf(table);
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java
new file mode 100644
index 0000000..aea004e
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework;
+
+import java.util.Random;
+
+public class SeedFactory
+{
+	Random rng;
+	
+	public SeedFactory()
+	{
+		rng = new Random();
+	}
+	
+	public SeedFactory(long seed)
+	{
+		rng = new Random(seed);
+	}
+	
+	public long getNextSeed()
+	{
+		return rng.nextLong();
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java
new file mode 100644
index 0000000..0b90e2b
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels;
+
+import java.io.Serializable;
+import java.util.Map;
+
+public class MarkovModel<T> implements Serializable
+{
+	private static final long serialVersionUID = 8378109656005603192L;
+
+	final Map<T, Map<T, Double>> transitionWeights;
+	final Map<T, Double> startWeights;
+	
+	public MarkovModel(Map<T, Map<T, Double>> transitionWeights, Map<T, Double> startWeights)
+	{
+		this.transitionWeights = transitionWeights;
+		this.startWeights = startWeights;
+	}
+
+	public Map<T, Map<T, Double>> getTransitionWeights()
+	{
+		return transitionWeights;
+	}
+
+	public Map<T, Double> getStartWeights()
+	{
+		return startWeights;
+	}
+
+	@Override
+	public String toString()
+	{
+		return "MarkModel(" + startWeights + "," + transitionWeights + ")";
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java
new file mode 100644
index 0000000..861c0ef
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableTable;
+
+public class MarkovModelBuilder<S>
+{
+	ImmutableTable.Builder<S, S, Double> transitionWeights;
+	ImmutableMap.Builder<S, Double> startWeights;
+	
+	public MarkovModelBuilder()
+	{
+		transitionWeights = ImmutableTable.builder();
+		startWeights = ImmutableMap.builder();
+	}
+	
+	public static <T> MarkovModelBuilder<T> create()
+	{
+		return new MarkovModelBuilder<T>();
+	}
+	
+	public void addStartState(S state, double weight)
+	{
+		startWeights.put(state, weight);
+	}
+	
+	public void addTransition(S state1, S state2, double weight)
+	{
+		transitionWeights.put(state1, state2, weight);
+	}
+	
+	public MarkovModel<S> build()
+	{
+		return new MarkovModel<S>(transitionWeights.build().rowMap(), startWeights.build());
+	}
+	
+	
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java
new file mode 100644
index 0000000..d0bd6c5
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels;
+
+import java.util.Map;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.RouletteWheelSampler;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler;
+
+import com.google.common.collect.ImmutableMap;
+
+public class MarkovProcess<T> implements Sampler<T>
+{
+	final ImmutableMap<T, Sampler<T>> transitionSamplers;
+	final Sampler<T> startStateSampler;
+	
+	T currentState;
+	
+	
+	public MarkovProcess(MarkovModel<T> model, SeedFactory factory)
+	{
+		Map<T, Map<T, Double>> transitionTable = model.getTransitionWeights();
+		
+		startStateSampler = RouletteWheelSampler.create(model.getStartWeights(), factory);
+		
+		ImmutableMap.Builder<T, Sampler<T>> builder = ImmutableMap.builder();
+		for(Map.Entry<T, Map<T, Double>> entry : transitionTable.entrySet())
+		{
+			builder.put(entry.getKey(), RouletteWheelSampler.create(entry.getValue(), factory));
+		}
+		
+		
+		this.transitionSamplers = builder.build();
+		
+		currentState = null;
+	}
+	
+	public static <T> MarkovProcess<T> create(MarkovModel<T> model, SeedFactory factory)
+	{
+		return new MarkovProcess<T>(model, factory);
+	}
+	
+	public T sample() throws Exception
+	{
+		if(currentState == null)
+		{
+			currentState = startStateSampler.sample();
+			return currentState;
+		}
+		
+		currentState = transitionSamplers.get(currentState).sample();
+		return currentState;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java
new file mode 100644
index 0000000..5161761
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public interface ConditionalProbabilityDensityFunction<T, S>
+{
+	public double probability(T datum, S conditionalDatum);
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java
new file mode 100644
index 0000000..dcc1278
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public class ExponentialPDF implements ProbabilityDensityFunction<Double>
+{
+	private final double lambda;
+	
+	public ExponentialPDF(double lambda)
+	{
+		this.lambda = lambda;
+	}
+	
+	public double probability(Double value)
+	{
+		return lambda * Math.exp(-1.0 * value * lambda);
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java
new file mode 100644
index 0000000..55ebc93
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public class GaussianPDF implements ProbabilityDensityFunction<Double>
+{
+	private double mean;
+	private double std;
+	
+	public GaussianPDF(double mean, double std)
+	{
+		this.mean = mean;
+		this.std = std;
+	}
+	
+	public double probability(Double value)
+	{
+		double diff = (mean - value) * (mean - value);
+		double var = std * std;
+		double exp = Math.exp(-1.0 * diff / (2.0 * var));
+		
+		return exp / (std * Math.sqrt(2.0 * Math.PI));
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java
new file mode 100644
index 0000000..fdf2db0
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+import java.util.List;
+
+public class JointPDF<T> implements ProbabilityDensityFunction<T>
+{
+	
+	double normalizationFactor;
+	ProbabilityDensityFunction<T>[] pdfs;
+	
+	public JointPDF(List<T> data, ProbabilityDensityFunction<T> ... pdfs)
+	{
+		this.pdfs = pdfs;
+		
+		normalizationFactor = 0.0d;
+		for(T datum : data)
+		{
+			double prob = 1.0;
+			for(ProbabilityDensityFunction<T> pdf : pdfs)
+				prob *= pdf.probability(datum);
+			normalizationFactor += prob;
+		}
+		
+	}
+	
+	public double probability(T datum)
+	{
+		double weight = 1.0;
+		for(ProbabilityDensityFunction<T> pdf : pdfs)
+			weight *= pdf.probability(datum);
+		
+		return weight / normalizationFactor;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/MultinomialPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/MultinomialPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/MultinomialPDF.java
new file mode 100644
index 0000000..7605156
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/MultinomialPDF.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+import java.util.Map;
+import java.util.Set;
+
+import com.google.common.collect.ImmutableMap;
+
+public class MultinomialPDF<T> implements ProbabilityDensityFunction<T>
+{
+	private final ImmutableMap<T, Double> probabilities;
+	
+	public MultinomialPDF(Map<T, Double> probabilities)
+	{
+		this.probabilities = ImmutableMap.copyOf(probabilities);
+	}
+	
+	public Set<T> getData()
+	{
+		return probabilities.keySet();
+	}
+	
+	public double probability(T value)
+	{
+		if(probabilities.containsKey(value))
+		{
+			return probabilities.get(value);
+		}
+		
+		return 0.0;
+	}
+
+	@Override
+	public String toString()
+	{
+		String str = "";
+		for(Map.Entry<T, Double> entry: probabilities.entrySet())
+		{
+			str += entry.getValue() + "," + entry.getKey() + "\n";
+		}
+		return str;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java
new file mode 100644
index 0000000..1b691ca
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public interface ProbabilityDensityFunction<T>
+{
+	public double probability(T datum);
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java
new file mode 100644
index 0000000..ea8e77e
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs;
+
+public class UniformPDF<T> implements ProbabilityDensityFunction<T>
+{
+	private final double probability;
+	
+	public UniformPDF(long count)
+	{
+		probability = 1.0 / ((double) count);
+	}
+	
+	public UniformPDF(double probability)
+	{
+		this.probability = probability;
+	}
+	
+	public double probability(T datum)
+	{
+		return probability;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java
new file mode 100644
index 0000000..348f080
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.List;
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.commons.lang3.tuple.Pair;
+
+import com.google.common.collect.ImmutableList;
+
+public class BoundedMultiModalGaussianSampler implements Sampler<Double>
+{
+	ImmutableList<Pair<Double, Double>> distributions;
+	
+	double min;
+	double max;
+	Random rng;
+	
+	public BoundedMultiModalGaussianSampler(List<Pair<Double, Double>> distributions, double min, double max, SeedFactory seedFactory)
+	{
+		rng = new Random(seedFactory.getNextSeed());
+		this.distributions = ImmutableList.copyOf(distributions);
+		
+		this.min = min;
+		this.max = max;
+	}
+	
+	public Double sample()
+	{
+		while(true)
+		{
+			int idx = rng.nextInt(distributions.size());
+			
+			double mean = distributions.get(idx).getLeft();
+			double std = distributions.get(idx).getRight();
+			
+			double value = mean + rng.nextGaussian() * std;
+			
+			if (value >= this.min && value <= this.max)
+			{
+				return value;
+			}
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java
new file mode 100644
index 0000000..c521333
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+public interface ConditionalSampler<T, S>
+{
+	public T sample(S conditional) throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java
new file mode 100644
index 0000000..82e4d2d
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+public class DoubleSequenceSampler implements Sampler<Double>
+{
+	Double start;
+	Double end;
+	Double step;
+	Double next;
+	
+	public DoubleSequenceSampler()
+	{
+		start = 0.0;
+		end = null;
+		step = 1.0;
+		next = start;
+	}
+	
+	public DoubleSequenceSampler(Double start)
+	{
+		this.start = start;
+		end = null;
+		step = 1.0;
+		next = start;
+	}
+	
+	public DoubleSequenceSampler(Double start, Double end)
+	{
+		this.start = start;
+		this.end = end;
+		step = 1.0;
+		next = start;
+	}
+	
+	public DoubleSequenceSampler(Double start, Double end, Double step)
+	{
+		this.start = start;
+		this.end = end;
+		this.step = step;
+		next = start;
+	}
+	
+	public Double sample() throws Exception
+	{
+		if(end == null || next < end)
+		{
+			Double current = next;
+			next = current + step;
+			return current;
+		}
+		
+		throw new Exception("All values have been sampled");
+	}
+	
+	
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java
new file mode 100644
index 0000000..082f3ac
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+
+public class ExponentialSampler implements Sampler<Double>
+{
+	final private Random rng;
+	final private double lambda;
+	
+	public ExponentialSampler(double lambda, SeedFactory seedFactory)
+	{
+		rng = new Random(seedFactory.getNextSeed());
+		this.lambda = lambda;
+	}
+	
+	public Double sample()
+	{
+		return - Math.log(1.0 - rng.nextDouble()) / lambda;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java
new file mode 100644
index 0000000..ed40cc8
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+
+public class GaussianSampler implements Sampler<Double>
+{
+	double mean;
+	double std;
+	Random rng;
+	
+	public GaussianSampler(double mean, double std, SeedFactory seedFactory)
+	{
+		rng = new Random(seedFactory.getNextSeed());
+		this.mean = mean;
+		this.std = std;
+	}
+	
+	public Double sample()
+	{
+		return rng.nextGaussian() * std + mean;
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java
new file mode 100644
index 0000000..0db8200
--- /dev/null
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers;
+
+import java.util.Random;
+
+import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory;
+import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction;
+
+
+public class MonteCarloSampler<T> implements Sampler<T>
+{
+	private final Sampler<T> stateSampler;
+	private final Random rng;
+	private final ProbabilityDensityFunction<T> acceptancePDF;
+	
+	public MonteCarloSampler(Sampler<T> stateGenerator,
+			ProbabilityDensityFunction<T> acceptancePDF,
+			SeedFactory seedFactory)
+	{
+		this.acceptancePDF = acceptancePDF;
+		this.stateSampler = stateGenerator;
+		
+		rng = new Random(seedFactory.getNextSeed());
+	}
+
+	public T sample() throws Exception
+	{
+		while(true)
+		{
+			T proposedState = this.stateSampler.sample();
+			double probability = acceptancePDF.probability(proposedState);
+			double r = rng.nextDouble();
+			
+			if(r < probability)
+			{
+				return proposedState;
+			}
+		}
+	}
+	
+}


Mime
View raw message