Return-Path: X-Original-To: apmail-bigtop-commits-archive@www.apache.org Delivered-To: apmail-bigtop-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 10C05183A2 for ; Sun, 30 Aug 2015 19:34:57 +0000 (UTC) Received: (qmail 23821 invoked by uid 500); 30 Aug 2015 14:08:17 -0000 Delivered-To: apmail-bigtop-commits-archive@bigtop.apache.org Received: (qmail 23782 invoked by uid 500); 30 Aug 2015 14:08:17 -0000 Mailing-List: contact commits-help@bigtop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: bigtop-dev@bigtop.apache.org Delivered-To: mailing list commits@bigtop.apache.org Received: (qmail 23773 invoked by uid 99); 30 Aug 2015 14:08:17 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 30 Aug 2015 14:08:17 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 3ADE3DFF82; Sun, 30 Aug 2015 14:08:17 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: rnowling@apache.org To: commits@bigtop.apache.org Message-Id: <9b9ea7d519fb4dd5b7ef5f1fe76c11db@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: bigtop git commit: BIGTOP-1995. Update BigPetStore to use external locations data Date: Sun, 30 Aug 2015 14:08:17 +0000 (UTC) Repository: bigtop Updated Branches: refs/heads/master b25084fa7 -> 431b89516 BIGTOP-1995. Update BigPetStore to use external locations data Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/431b8951 Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/431b8951 Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/431b8951 Branch: refs/heads/master Commit: 431b895166f8f976990c66733cfa210bdaa6fee0 Parents: b25084f Author: RJ Nowling Authored: Sun Aug 30 01:35:33 2015 -0500 Committer: RJ Nowling Committed: Sun Aug 30 02:03:40 2015 -0500 ---------------------------------------------------------------------- .../bigpetstore-data-generator/build.gradle | 4 +- .../datagenerators/bigpetstore/Constants.java | 5 -- .../datagenerators/bigpetstore/DataLoader.java | 25 ++------ .../bigpetstore/datamodels/Customer.java | 8 +-- .../bigpetstore/datamodels/Store.java | 8 +-- .../datamodels/inputs/InputData.java | 8 ++- .../customer/CustomerLocationPDF.java | 20 +++--- .../generators/customer/CustomerSampler.java | 8 +-- .../customer/CustomerSamplerBuilder.java | 14 ++-- .../store/StoreLocationIncomePDF.java | 14 ++-- .../store/StoreLocationPopulationPDF.java | 10 +-- .../generators/store/StoreSampler.java | 8 +-- .../generators/store/StoreSamplerBuilder.java | 16 ++--- .../customer/TestCustomerLocationPDF.java | 11 ++-- .../customer/TestCustomerSampler.java | 24 +++---- .../customer/TestCustomerSamplerBuilder.java | 10 +-- .../store/TestStoreLocationIncomePDF.java | 13 ++-- .../store/TestStoreLocationPopulationPDF.java | 13 ++-- .../generators/store/TestStoreSampler.java | 11 ++-- .../store/TestStoreSamplerBuilder.java | 11 ++-- .../namegenerator/NameGenerator.java | 5 +- .../namegenerator/NameReader.java | 2 +- .../samplers/wfs/DiscreteWeightFunction.java | 27 ++++++++ .../samplers/wfs/MultinomialWF.java | 67 ++++++++++++++++++++ bigtop-data-generators/settings.gradle | 2 +- 25 files changed, 208 insertions(+), 136 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/build.gradle ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/build.gradle b/bigtop-data-generators/bigpetstore-data-generator/build.gradle index 0d2bead..9787512 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/build.gradle +++ b/bigtop-data-generators/bigpetstore-data-generator/build.gradle @@ -59,7 +59,9 @@ dependencies { compile 'com.google.guava:guava:18.0' compile 'com.google.code.gson:gson:2.3' compile 'org.apache.commons:commons-lang3:3.4' - compile project(":bigtop-samplers"), project(":bigtop-name-generator") + compile project(":bigtop-samplers") + compile project(":bigtop-name-generator") + compile project(":bigtop-location-data") testCompile 'junit:junit:4.+' } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java index 1e8e758..ef11299 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java @@ -15,7 +15,6 @@ */ package org.apache.bigtop.datagenerators.bigpetstore; -import java.io.File; import java.util.List; import java.util.Set; @@ -38,10 +37,6 @@ public class Constants MEDIUM; } - public static final File COORDINATES_FILE = new File("zips.csv"); - public static final File INCOMES_FILE = new File("ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv"); - public static final File POPULATION_FILE = new File("population_data.csv"); - public static final ProductsCollectionSize PRODUCTS_COLLECTION = ProductsCollectionSize.MEDIUM; public static final double INCOME_SCALING_FACTOR = 100.0; http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java index ecbd6cf..e8a4023 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java @@ -15,35 +15,18 @@ */ package org.apache.bigtop.datagenerators.bigpetstore; -import java.io.BufferedInputStream; -import java.io.File; -import java.io.InputStream; import java.util.List; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.datareaders.ZipcodeReader; +import org.apache.bigtop.datagenerators.locations.Location; +import org.apache.bigtop.datagenerators.locations.LocationReader; public class DataLoader { - private InputStream getResource(File filename) throws Exception - { - InputStream stream = getClass().getResourceAsStream("/input_data/" + filename); - return new BufferedInputStream(stream); - } - public InputData loadData() throws Exception { - - System.out.println("Reading zipcode data"); - ZipcodeReader zipcodeReader = new ZipcodeReader(); - zipcodeReader.setCoordinatesFile(getResource(Constants.COORDINATES_FILE)); - zipcodeReader.setIncomesFile(getResource(Constants.INCOMES_FILE)); - zipcodeReader.setPopulationFile(getResource(Constants.POPULATION_FILE)); - List zipcodeTable = zipcodeReader.readData(); - System.out.println("Read " + zipcodeTable.size() + " zipcode entries"); - - InputData inputData = new InputData(zipcodeTable); + List locations = new LocationReader().readData(); + InputData inputData = new InputData(locations); return inputData; } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Customer.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Customer.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Customer.java index 8847a36..b46c8cd 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Customer.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Customer.java @@ -17,7 +17,7 @@ package org.apache.bigtop.datagenerators.bigpetstore.datamodels; import java.io.Serializable; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.commons.lang3.tuple.Pair; public class Customer implements Serializable @@ -26,10 +26,10 @@ public class Customer implements Serializable int id; Pair name; - ZipcodeRecord location; + Location location; Store store; - public Customer(int id, Pair name, Store store, ZipcodeRecord location) + public Customer(int id, Pair name, Store store, Location location) { this.id = id; this.name = name; @@ -47,7 +47,7 @@ public class Customer implements Serializable return name; } - public ZipcodeRecord getLocation() + public Location getLocation() { return location; } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Store.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Store.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Store.java index 61730dd..c9c4443 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Store.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Store.java @@ -17,7 +17,7 @@ package org.apache.bigtop.datagenerators.bigpetstore.datamodels; import java.io.Serializable; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; public class Store implements Serializable { @@ -25,9 +25,9 @@ public class Store implements Serializable int id; String name; - ZipcodeRecord location; + Location location; - public Store(int id, String name, ZipcodeRecord location) + public Store(int id, String name, Location location) { this.id = id; this.name = name; @@ -44,7 +44,7 @@ public class Store implements Serializable return name; } - public ZipcodeRecord getLocation() + public Location getLocation() { return location; } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java index c180136..074ea7e 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java @@ -19,18 +19,20 @@ import java.io.Serializable; import java.util.Collections; import java.util.List; +import org.apache.bigtop.datagenerators.locations.Location; + public class InputData implements Serializable { private static final long serialVersionUID = 9078989799806707788L; - List zipcodeTable; + List zipcodeTable; - public InputData(List zipcodeTable) + public InputData(List zipcodeTable) { this.zipcodeTable = Collections.unmodifiableList(zipcodeTable); } - public List getZipcodeTable() + public List getZipcodeTable() { return zipcodeTable; } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerLocationPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerLocationPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerLocationPDF.java index 08cbc81..0b5c480 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerLocationPDF.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerLocationPDF.java @@ -19,29 +19,29 @@ import java.util.List; import java.util.Map; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; -public class CustomerLocationPDF implements ProbabilityDensityFunction +public class CustomerLocationPDF implements ProbabilityDensityFunction { - private final Map pdf; + private final Map pdf; - public CustomerLocationPDF(List zipcodes, Store store, double averageDistance) + public CustomerLocationPDF(List zipcodes, Store store, double averageDistance) { this.pdf = build(zipcodes, store, averageDistance); } - protected ImmutableMap build(List zipcodeTable, + protected ImmutableMap build(List zipcodeTable, Store store, double averageDistance) { double lambda = 1.0 / averageDistance; - Map zipcodeWeights = Maps.newHashMap(); + Map zipcodeWeights = Maps.newHashMap(); double totalWeight = 0.0; - for(ZipcodeRecord record : zipcodeTable) + for(Location record : zipcodeTable) { double dist = record.distance(store.getLocation()); @@ -50,8 +50,8 @@ public class CustomerLocationPDF implements ProbabilityDensityFunction pdf = Maps.newHashMap(); - for(ZipcodeRecord record : zipcodeTable) + Map pdf = Maps.newHashMap(); + for(Location record : zipcodeTable) { pdf.put(record, zipcodeWeights.get(record) / totalWeight); } @@ -59,7 +59,7 @@ public class CustomerLocationPDF implements ProbabilityDensityFunction private final Sampler idSampler; private final Sampler> nameSampler; private final Sampler storeSampler; - private final ConditionalSampler locationSampler; + private final ConditionalSampler locationSampler; public CustomerSampler(Sampler idSampler, Sampler> nameSampler, Sampler storeSampler, - ConditionalSampler locationSampler) + ConditionalSampler locationSampler) { this.idSampler = idSampler; this.nameSampler = nameSampler; @@ -46,7 +46,7 @@ public class CustomerSampler implements Sampler Integer id = idSampler.sample(); Pair name = nameSampler.sample(); Store store = storeSampler.sample(); - ZipcodeRecord location = locationSampler.sample(store); + Location location = locationSampler.sample(store); return new Customer(id, name, store, location); } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java index 44ffa6a..ae64f0c 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java @@ -22,7 +22,7 @@ import org.apache.bigtop.datagenerators.bigpetstore.Constants; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.namegenerator.NameGenerator; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; @@ -47,20 +47,20 @@ public class CustomerSamplerBuilder this.inputData = inputData; } - protected ConditionalSampler buildLocationSampler() + protected ConditionalSampler buildLocationSampler() { - final Map> locationSamplers = Maps.newHashMap(); + final Map> locationSamplers = Maps.newHashMap(); for(Store store : stores) { - ProbabilityDensityFunction locationPDF = new CustomerLocationPDF(inputData.getZipcodeTable(), + ProbabilityDensityFunction locationPDF = new CustomerLocationPDF(inputData.getZipcodeTable(), store, Constants.AVERAGE_CUSTOMER_STORE_DISTANCE); - Sampler locationSampler = RouletteWheelSampler.create(inputData.getZipcodeTable(), locationPDF, seedFactory); + Sampler locationSampler = RouletteWheelSampler.create(inputData.getZipcodeTable(), locationPDF, seedFactory); locationSamplers.put(store, locationSampler); } - return new ConditionalSampler() + return new ConditionalSampler() { - public ZipcodeRecord sample(Store store) throws Exception + public Location sample(Store store) throws Exception { return locationSamplers.get(store).sample(); } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationIncomePDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationIncomePDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationIncomePDF.java index 7e56462..d3055b9 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationIncomePDF.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationIncomePDF.java @@ -17,22 +17,22 @@ package org.apache.bigtop.datagenerators.bigpetstore.generators.store; import java.util.List; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; -public class StoreLocationIncomePDF implements ProbabilityDensityFunction +public class StoreLocationIncomePDF implements ProbabilityDensityFunction { double incomeNormalizationFactor; double minIncome; double k; - public StoreLocationIncomePDF(List zipcodeTable, double incomeScalingFactor) + public StoreLocationIncomePDF(List zipcodeTable, double incomeScalingFactor) { double maxIncome = 0.0; minIncome = Double.MAX_VALUE; - for(ZipcodeRecord record : zipcodeTable) + for(Location record : zipcodeTable) { maxIncome = Math.max(maxIncome, record.getMedianHouseholdIncome()); minIncome = Math.min(minIncome, record.getMedianHouseholdIncome()); @@ -41,21 +41,21 @@ public class StoreLocationIncomePDF implements ProbabilityDensityFunction +public class StoreLocationPopulationPDF implements ProbabilityDensityFunction { double populationSum = 0.0; - public StoreLocationPopulationPDF(List zipcodeTable) + public StoreLocationPopulationPDF(List zipcodeTable) { long populationSum = 0L; - for(ZipcodeRecord record : zipcodeTable) + for(Location record : zipcodeTable) { populationSum += record.getPopulation(); } @@ -35,7 +35,7 @@ public class StoreLocationPopulationPDF implements ProbabilityDensityFunction { - private final Sampler locationSampler; + private final Sampler locationSampler; private final Sampler idSampler; - public StoreSampler(Sampler idSampler, Sampler locationSampler) + public StoreSampler(Sampler idSampler, Sampler locationSampler) { this.locationSampler = locationSampler; this.idSampler = idSampler; @@ -35,7 +35,7 @@ public class StoreSampler implements Sampler { Integer id = idSampler.sample(); String name = "Store_" + id; - ZipcodeRecord location = locationSampler.sample(); + Location location = locationSampler.sample(); Store store = new Store(id, name, location); http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSamplerBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSamplerBuilder.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSamplerBuilder.java index 74f6698..da795ce 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSamplerBuilder.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSamplerBuilder.java @@ -19,7 +19,7 @@ import java.util.List; import org.apache.bigtop.datagenerators.bigpetstore.Constants; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.pdfs.JointPDF; import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; @@ -29,10 +29,10 @@ import org.apache.bigtop.datagenerators.samplers.samplers.SequenceSampler; public class StoreSamplerBuilder { - private final List zipcodeTable; + private final List zipcodeTable; private final SeedFactory seedFactory; - public StoreSamplerBuilder(List zipcodeTable, SeedFactory seedFactory) + public StoreSamplerBuilder(List zipcodeTable, SeedFactory seedFactory) { this.zipcodeTable = zipcodeTable; this.seedFactory = seedFactory; @@ -42,14 +42,14 @@ public class StoreSamplerBuilder { Sampler idSampler = new SequenceSampler(); - ProbabilityDensityFunction locationPopulationPDF = + ProbabilityDensityFunction locationPopulationPDF = new StoreLocationPopulationPDF(zipcodeTable); - ProbabilityDensityFunction locationIncomePDF = + ProbabilityDensityFunction locationIncomePDF = new StoreLocationIncomePDF(zipcodeTable, Constants.INCOME_SCALING_FACTOR); - ProbabilityDensityFunction locationJointPDF = - new JointPDF(zipcodeTable, locationPopulationPDF, locationIncomePDF); + ProbabilityDensityFunction locationJointPDF = + new JointPDF(zipcodeTable, locationPopulationPDF, locationIncomePDF); - Sampler locationSampler = RouletteWheelSampler.create(zipcodeTable, locationJointPDF, seedFactory); + Sampler locationSampler = RouletteWheelSampler.create(zipcodeTable, locationJointPDF, seedFactory); return new StoreSampler(idSampler, locationSampler); } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerLocationPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerLocationPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerLocationPDF.java index 60733a9..ac6b1c2 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerLocationPDF.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerLocationPDF.java @@ -23,8 +23,7 @@ import java.util.List; import org.apache.bigtop.datagenerators.bigpetstore.Constants; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.generators.customer.CustomerLocationPDF; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.commons.lang3.tuple.Pair; import org.junit.Test; @@ -34,10 +33,10 @@ public class TestCustomerLocationPDF @Test public void testProbability() throws Exception { - List zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + List zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); List stores = new ArrayList(); http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java index a176333..39857b4 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java @@ -26,7 +26,7 @@ import java.util.Map; import org.apache.bigtop.datagenerators.bigpetstore.Constants; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; import org.apache.bigtop.datagenerators.samplers.samplers.ConditionalSampler; @@ -41,21 +41,21 @@ import com.google.common.collect.Maps; public class TestCustomerSampler { - protected ConditionalSampler buildLocationSampler(List stores, List records, + protected ConditionalSampler buildLocationSampler(List stores, List records, SeedFactory factory) { - final Map> locationSamplers = Maps.newHashMap(); + final Map> locationSamplers = Maps.newHashMap(); for(Store store : stores) { - ProbabilityDensityFunction locationPDF = new CustomerLocationPDF(records, + ProbabilityDensityFunction locationPDF = new CustomerLocationPDF(records, store, Constants.AVERAGE_CUSTOMER_STORE_DISTANCE); - Sampler locationSampler = RouletteWheelSampler.create(records, locationPDF, factory); + Sampler locationSampler = RouletteWheelSampler.create(records, locationPDF, factory); locationSamplers.put(store, locationSampler); } - return new ConditionalSampler() + return new ConditionalSampler() { - public ZipcodeRecord sample(Store store) throws Exception + public Location sample(Store store) throws Exception { return locationSamplers.get(store).sample(); } @@ -73,10 +73,10 @@ public class TestCustomerSampler nameList.add(Pair.of("George", "George")); nameList.add(Pair.of("Fiona", "Fiona")); - List zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + List zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); List stores = new ArrayList(); @@ -90,7 +90,7 @@ public class TestCustomerSampler Sampler idSampler = new SequenceSampler(); Sampler> nameSampler = RouletteWheelSampler.createUniform(nameList, factory); Sampler storeSampler = RouletteWheelSampler.createUniform(stores, factory); - ConditionalSampler zipcodeSampler = buildLocationSampler(stores, zipcodes, factory); + ConditionalSampler zipcodeSampler = buildLocationSampler(stores, zipcodes, factory); Sampler sampler = new CustomerSampler(idSampler, nameSampler, storeSampler, zipcodeSampler); http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java index 6ddaa94..47cd100 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java @@ -24,7 +24,7 @@ import java.util.List; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; import org.apache.commons.lang3.tuple.Pair; @@ -36,10 +36,10 @@ public class TestCustomerSamplerBuilder @Test public void testSample() throws Exception { - List zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + List zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); // don't need product categories for building customers http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationIncomePDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationIncomePDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationIncomePDF.java index b194c3a..741698e 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationIncomePDF.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationIncomePDF.java @@ -20,8 +20,7 @@ import static org.junit.Assert.assertTrue; import java.util.Arrays; import java.util.List; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.generators.store.StoreLocationIncomePDF; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.commons.lang3.tuple.Pair; import org.junit.Test; @@ -31,15 +30,15 @@ public class TestStoreLocationIncomePDF @Test public void testProbability() throws Exception { - List zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + List zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); StoreLocationIncomePDF pdf = new StoreLocationIncomePDF(zipcodes, 100.0); - for(ZipcodeRecord record : zipcodes) + for(Location record : zipcodes) { assertTrue(pdf.probability(record) > 0.0); } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationPopulationPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationPopulationPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationPopulationPDF.java index 002e14f..aa70821 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationPopulationPDF.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationPopulationPDF.java @@ -20,8 +20,7 @@ import static org.junit.Assert.assertTrue; import java.util.Arrays; import java.util.List; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.generators.store.StoreLocationPopulationPDF; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.commons.lang3.tuple.Pair; import org.junit.Test; @@ -31,15 +30,15 @@ public class TestStoreLocationPopulationPDF @Test public void testProbability() throws Exception { - List zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + List zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); StoreLocationPopulationPDF pdf = new StoreLocationPopulationPDF(zipcodes); - for(ZipcodeRecord record : zipcodes) + for(Location record : zipcodes) { assertTrue(pdf.probability(record) > 0.0); } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSampler.java index 2594509..dc14d77 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSampler.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSampler.java @@ -22,8 +22,7 @@ import java.util.Arrays; import java.util.Collection; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.generators.store.StoreSampler; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.samplers.RouletteWheelSampler; import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; @@ -37,10 +36,10 @@ public class TestStoreSampler @Test public void testSampler() throws Exception { - Collection zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + Collection zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); SeedFactory factory = new SeedFactory(1234); http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSamplerBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSamplerBuilder.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSamplerBuilder.java index f440ff6..19f6cd2 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSamplerBuilder.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSamplerBuilder.java @@ -22,8 +22,7 @@ import java.util.Arrays; import java.util.List; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.generators.store.StoreSamplerBuilder; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; import org.apache.commons.lang3.tuple.Pair; @@ -35,10 +34,10 @@ public class TestStoreSamplerBuilder @Test public void testBuild() throws Exception { - List zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + List zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); assertTrue(zipcodes.size() > 0); http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java index 4b5f620..e8c9ebf 100644 --- a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java +++ b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java @@ -18,6 +18,7 @@ package org.apache.bigtop.datagenerators.namegenerator; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.samplers.RouletteWheelSampler; import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; +import org.apache.bigtop.datagenerators.samplers.wfs.MultinomialWF; import org.apache.commons.lang3.tuple.Pair; public class NameGenerator implements Sampler> @@ -29,8 +30,8 @@ public class NameGenerator implements Sampler> { Names names = new NameReader().readData(); - firstNameSampler = RouletteWheelSampler.create(names.getFirstNames(), seedFactory); - lastNameSampler = RouletteWheelSampler.create(names.getLastNames(), seedFactory); + firstNameSampler = RouletteWheelSampler.create(new MultinomialWF(names.getFirstNames()).normalize(), seedFactory); + lastNameSampler = RouletteWheelSampler.create(new MultinomialWF(names.getLastNames()).normalize(), seedFactory); } public Pair sample() throws Exception http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java index d53529f..548cf91 100644 --- a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java +++ b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java @@ -52,7 +52,7 @@ public class NameReader String[] cols = line.trim().split(","); String name = cols[0]; - double weight = Double.parseDouble(cols[5]); + double weight = 1.0 / (Double.parseDouble(cols[5]) + 1.0); if(cols[4].equals("1")) firstNames.put(name, weight); http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/DiscreteWeightFunction.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/DiscreteWeightFunction.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/DiscreteWeightFunction.java new file mode 100644 index 0000000..a68acb9 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/DiscreteWeightFunction.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.wfs; + +import java.util.Set; + +import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; + +public interface DiscreteWeightFunction extends WeightFunction +{ + public ProbabilityDensityFunction normalize(); + + public Set getData(); +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/MultinomialWF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/MultinomialWF.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/MultinomialWF.java new file mode 100644 index 0000000..e4a2a35 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/MultinomialWF.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.wfs; + +import java.util.Map; +import java.util.Set; + +import org.apache.bigtop.datagenerators.samplers.pdfs.MultinomialPDF; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; + +public class MultinomialWF implements DiscreteWeightFunction +{ + private final ImmutableMap weights; + + public MultinomialWF(Map probabilities) + { + this.weights = ImmutableMap.copyOf(probabilities); + } + + public Set getData() + { + return weights.keySet(); + } + + public double weight(T value) + { + if(weights.containsKey(value)) + { + return weights.get(value); + } + + return 0.0; + } + + public MultinomialPDF normalize() + { + double sum = 0.0; + for(double w : weights.values()) + { + sum += w; + } + + Map probabilities = Maps.newHashMap(); + for(Map.Entry entry : weights.entrySet()) + { + probabilities.put(entry.getKey(), entry.getValue() / sum); + } + + return new MultinomialPDF(probabilities); + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/settings.gradle ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/settings.gradle b/bigtop-data-generators/settings.gradle index ad6c61d..a0e23a3 100644 --- a/bigtop-data-generators/settings.gradle +++ b/bigtop-data-generators/settings.gradle @@ -13,4 +13,4 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -include ":bigpetstore-data-generator", ":bigtop-name-generator", ":bigtop-samplers" \ No newline at end of file +include ":bigpetstore-data-generator", ":bigtop-name-generator", ":bigtop-samplers", ":bigtop-location-data"