bigtop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j..@apache.org
Subject [1/2] BIGTOP-1272: Productionize the mahout recommender
Date Wed, 20 Aug 2014 03:13:42 GMT
Repository: bigtop
Updated Branches:
  refs/heads/master e9771e613 -> 4fca4573b


http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.java
deleted file mode 100755
index 0ea81ee..0000000
--- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.java
+++ /dev/null
@@ -1,468 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.bigtop.bigpetstore.generator;
-
-
-import java.util.Date;
-import java.util.Iterator;
-import java.util.Random;
-
-import org.apache.bigtop.bigpetstore.util.Pair;
-import org.apache.bigtop.bigpetstore.util.StringUtils;
-
-/**
- * This class generates our data. Over time we will use it to embed bias which
- * can then be teased out, i.e. by clutstering/classifiers. For example:
- *
- * certain products <--> certain years or days
- *
- *
- */
-public class TransactionIteratorFactory {
-
-    /**
-     * Each "state" has a pet store , with a certain "proportion" of the
-     * transactions. In this case colorado represents the majority of the
-     * transactions.
-     */
-
-    public static enum STATE {
-
-        // Each product is separated with an _ for its base price.
-        // That is just to make it easy to add new products.
-        // Each state is associated with a relative probability.
-        AZ(.1f, "dog-food_10", "cat-food_8", "leather-collar_25",
-                "snake-bite ointment_30", "turtle-food_11"),
-        AK(.1f,
-                "dog-food_10", "cat-food_8", "fuzzy-collar_19",
-                "antelope-caller_20", "salmon-bait_30"),
-        CT(.1f, "dog-food_10",
-                "cat-food_8", "fuzzy-collar_19", "turtle-pellets_5"),
-        OK(.1f,
-                "dog-food_10", "cat-food_8", "duck-caller_13",
-                "rodent-cage_40", "hay-bail_5", "cow-dung_2"),
-        CO(.1f,
-                "dog-food_10", "cat-food_8", "choke-collar_15",
-                "antelope snacks_30", "duck-caller_18"),
-        CA(.3f, "dog-food_10",
-                "cat-food_8", "fish-food_12", "organic-dog-food_16",
-                "turtle-pellets_5"),
-        NY(.2f, "dog-food_10", "cat-food_8", "steel-leash_20",
-                "fish-food_20", "seal-spray_25");
-
-        public static Random rand = new Random();
-        public float probability;
-        public String[] products;
-
-        private STATE(float probability, String... products) {
-            this.probability = probability;
-            this.products = products;
-        }
-
-        public Pair<String, Integer> randProduct() {
-            String product = products[rand.nextInt(products.length - 1)];
-            String name = StringUtils.substringBefore(product, "_");
-            Integer basePrice = Integer.parseInt(StringUtils.substringAfter(
-                    product, "_"));
-            return new Pair(name, basePrice);
-        }
-
-    }
-
-    public static class KeyVal<K, V> {
-
-        public final K key;
-        public final V val;
-
-        public KeyVal(K key, V val) {
-            this.key = key;
-            this.val = val;
-        }
-    }
-
-    private Iterator<KeyVal<String, String>> dataIterator;
-
-    Random r;
-
-    public TransactionIteratorFactory(final int records, final STATE state) {
-
-        /**
-         * Random is seeded by STATE. This way similar names will be randomly
-         * selected for states .
-         */
-        r = new Random(state.hashCode());
-
-        if (records == 0) {
-            throw new RuntimeException(
-                    "Cant create a data iterator with no records (records==0) !");
-        }
-
-        this.dataIterator = new Iterator<KeyVal<String, String>>() {
-            int trans_id = 1;
-
-            @Override
-            public boolean hasNext() {
-                // TODO Auto-generated method stub
-                return trans_id <= records;
-            }
-
-            int repeat = 0;
-            String fname = randFirstName();
-            String lname = randLastName();
-
-            @Override
-            public KeyVal<String, String> next() {
-                /**
-                 * Some customers come back for more :) We repeat a name up to
-                 * ten times.
-                 */
-                if (repeat > 0)
-                    repeat--;
-                else {
-                    fname = randFirstName();
-                    lname = randLastName();
-                    repeat = (int) (r.nextGaussian() * 10f);
-                }
-                String key, val;
-                key = join(",", "BigPetStore", "storeCode_" + state.name(),
-                        trans_id++ + "");
-                Pair<String, Integer> product_price = state.randProduct();
-                val = join(
-                        ",",
-                        fname,
-                        lname,
-                        getDate().toString(),
-                        fudgePrice(product_price.getFirst(),
-                                product_price.getSecond())
-                                + "", product_price.getFirst()); // products are
-                                                                 // biased by
-                                                                 // state
-
-                return new KeyVal<String, String>(key, val);
-            }
-
-            @Override
-            public void remove() {
-                // TODO Auto-generated method stub
-
-            }
-
-        };
-    }
-
-    /**
-     * Add some decimals to the price;
-     *
-     * @param i
-     * @return
-     */
-    public Float fudgePrice(String product, Integer i) {
-        float f = (float) i;
-        if (product.contains("dog")) {
-            return i + .50f;
-        }
-        if (product.contains("cat")) {
-            return i - .50f;
-        }
-        if (product.contains("fish")) {
-            return i - .25f;
-        } else
-            return i + .10f;
-    }
-
-    static String join(String sep, String... strs) {
-        if (strs.length == 0) {
-            return "";
-        } else if (strs.length == 1) {
-            return strs[0];
-        }
-        String temp = strs[0]; // inefficient ... should probably use
-                               // StringBuilder instead
-        for (int i = 1; i < strs.length; i++) {
-            temp += "," + strs[i];
-        }
-        return temp;
-    }
-
-    public Iterator<KeyVal<String, String>> getData() {
-        return this.dataIterator;
-    }
-
-    private String randFirstName() {
-        return FIRSTNAMES[this.r.nextInt(FIRSTNAMES.length - 1)].toLowerCase();
-    }
-
-    private String randLastName() {
-        return LASTNAMES[this.r.nextInt(LASTNAMES.length - 1)].toLowerCase();
-    }
-
-    private Date getDate() {
-        return new Date(this.r.nextInt());
-    }
-
-    private Integer getPrice() {
-        return this.r.nextInt(MAX_PRICE);
-    }
-
-    public static final Integer MINUTES_IN_DAY = 60 * 24;
-    public static final Integer MAX_PRICE = 10000;
-
-    private static String[] FIRSTNAMES = { "Aaron", "Abby", "Abigail", "Adam",
-            "Alan", "Albert", "Alex", "Alexandra", "Alexis", "Alice", "Alicia",
-            "Alisha", "Alissa", "Allen", "Allison", "Alyssa", "Amanda",
-            "Amber", "Amy", "Andrea", "Andrew", "Andy", "Angel", "Angela",
-            "Angie", "Anita", "Ann", "Anna", "Annette", "Anthony", "Antonio",
-            "April", "Arthur", "Ashley", "Audrey", "Austin", "Autumn", "Baby",
-            "Barb", "Barbara", "Becky", "Benjamin", "Beth", "Bethany", "Betty",
-            "Beverly", "Bill", "Billie", "Billy", "Blake", "Bob", "Bobbie",
-            "Bobby", "Bonnie", "Brad", "Bradley", "Brady", "Brandi", "Brandon",
-            "Brandy", "Breanna", "Brenda", "Brent", "Brett", "Brian",
-            "Brianna", "Brittany", "Brooke", "Brooklyn", "Bruce", "Bryan",
-            "Caleb", "Cameron", "Candy", "Carl", "Carla", "Carmen", "Carol",
-            "Carolyn", "Carrie", "Casey", "Cassandra", "Catherine", "Cathy",
-            "Chad", "Charlene", "Charles", "Charlie", "Charlotte", "Chase",
-            "Chasity", "Chastity", "Chelsea", "Cheryl", "Chester", "Cheyenne",
-            "Chris", "Christian", "Christina", "Christine", "Christoph",
-            "Christopher", "Christy", "Chuck", "Cindy", "Clara", "Clarence",
-            "Clayton", "Clifford", "Clint", "Cody", "Colton", "Connie",
-            "Corey", "Cory", "Courtney", "Craig", "Crystal", "Curtis",
-            "Cynthia", "Dakota", "Dale", "Dallas", "Dalton", "Dan", "Dana",
-            "Daniel", "Danielle", "Danny", "Darla", "Darlene", "Darrell",
-            "Darren", "Dave", "David", "Dawn", "Dean", "Deanna", "Debbie",
-            "Deborah", "Debra", "Denise", "Dennis", "Derek", "Derrick",
-            "Destiny", "Devin", "Diana", "Diane", "Dillon", "Dixie", "Dominic",
-            "Don", "Donald", "Donna", "Donnie", "Doris", "Dorothy", "Doug",
-            "Douglas", "Drew", "Duane", "Dustin", "Dusty", "Dylan", "Earl",
-            "Ed", "Eddie", "Edward", "Elaine", "Elizabeth", "Ellen", "Emily",
-            "Eric", "Erica", "Erika", "Erin", "Ernest", "Ethan", "Eugene",
-            "Eva", "Evelyn", "Everett", "Faith", "Father", "Felicia", "Floyd",
-            "Francis", "Frank", "Fred", "Gabriel", "Gage", "Gail", "Gary",
-            "Gene", "George", "Gerald", "Gina", "Ginger", "Glen", "Glenn",
-            "Gloria", "Grace", "Greg", "Gregory", "Haley", "Hannah", "Harley",
-            "Harold", "Harry", "Heath", "Heather", "Heidi", "Helen", "Herbert",
-            "Holly", "Hope", "Howard", "Hunter", "Ian", "Isaac", "Jack",
-            "Jackie", "Jacob", "Jade", "Jake", "James", "Jamie", "Jan", "Jane",
-            "Janet", "Janice", "Jared", "Jasmine", "Jason", "Jay", "Jean",
-            "Jeannie", "Jeff", "Jeffery", "Jeffrey", "Jenna", "Jennifer",
-            "Jenny", "Jeremiah", "Jeremy", "Jerry", "Jesse", "Jessica",
-            "Jessie", "Jill", "Jim", "Jimmy", "Joann", "Joanne", "Jodi",
-            "Jody", "Joe", "Joel", "Joey", "John", "Johnathan", "Johnny",
-            "Jon", "Jonathan", "Jonathon", "Jordan", "Joseph", "Josh",
-            "Joshua", "Joyce", "Juanita", "Judy", "Julia", "Julie", "Justin",
-            "Kaitlyn", "Karen", "Katelyn", "Katherine", "Kathleen", "Kathryn",
-            "Kathy", "Katie", "Katrina", "Kay", "Kayla", "Kaylee", "Keith",
-            "Kelly", "Kelsey", "Ken", "Kendra", "Kenneth", "Kenny", "Kevin",
-            "Kim", "Kimberly", "Kris", "Krista", "Kristen", "Kristin",
-            "Kristina", "Kristy", "Kyle", "Kylie", "Lacey", "Laken", "Lance",
-            "Larry", "Laura", "Lawrence", "Leah", "Lee", "Leonard", "Leroy",
-            "Leslie", "Levi", "Lewis", "Linda", "Lindsay", "Lindsey", "Lisa",
-            "Lloyd", "Logan", "Lois", "Loretta", "Lori", "Louis", "Lynn",
-            "Madison", "Mandy", "Marcus", "Margaret", "Maria", "Mariah",
-            "Marie", "Marilyn", "Marion", "Mark", "Marlene", "Marsha",
-            "Martha", "Martin", "Marty", "Marvin", "Mary", "Mary ann", "Mason",
-            "Matt", "Matthew", "Max", "Megan", "Melanie", "Melinda", "Melissa",
-            "Melody", "Michael", "Michelle", "Mickey", "Mike", "Mindy",
-            "Miranda", "Misty", "Mitchell", "Molly", "Monica", "Morgan",
-            "Mother", "Myron", "Nancy", "Natasha", "Nathan", "Nicholas",
-            "Nick", "Nicole", "Nina", "Noah", "Norma", "Norman", "Olivia",
-            "Paige", "Pam", "Pamela", "Pat", "Patricia", "Patrick", "Patty",
-            "Paul", "Paula", "Peggy", "Penny", "Pete", "Phillip", "Phyllis",
-            "Rachael", "Rachel", "Ralph", "Randall", "Randi", "Randy", "Ray",
-            "Raymond", "Rebecca", "Regina", "Renee", "Rex", "Rhonda",
-            "Richard", "Rick", "Ricky", "Rita", "Rob", "Robbie", "Robert",
-            "Roberta", "Robin", "Rochelle", "Rocky", "Rod", "Rodney", "Roger",
-            "Ron", "Ronald", "Ronda", "Ronnie", "Rose", "Roxanne", "Roy",
-            "Russ", "Russell", "Rusty", "Ruth", "Ryan", "Sabrina", "Sally",
-            "Sam", "Samantha", "Samuel", "Sandra", "Sandy", "Sara", "Sarah",
-            "Savannah", "Scott", "Sean", "Seth", "Shanda", "Shane", "Shanna",
-            "Shannon", "Sharon", "Shaun", "Shawn", "Shawna", "Sheila",
-            "Shelly", "Sher", "Sherri", "Sherry", "Shirley", "Sierra",
-            "Skyler", "Stacey", "Stacy", "Stanley", "Stephanie", "Stephen",
-            "Steve", "Steven", "Sue", "Summer", "Susan", "Sydney", "Tabatha",
-            "Tabitha", "Tamara", "Tammy", "Tara", "Tasha", "Tashia", "Taylor",
-            "Ted", "Teresa", "Terri", "Terry", "Tessa", "Thelma", "Theresa",
-            "Thomas", "Tia", "Tiffany", "Tim", "Timmy", "Timothy", "Tina",
-            "Todd", "Tom", "Tommy", "Toni", "Tony", "Tonya", "Tracey",
-            "Tracie", "Tracy", "Travis", "Trent", "Trevor", "Trey", "Trisha",
-            "Tristan", "Troy", "Tyler", "Tyrone", "Unborn", "Valerie",
-            "Vanessa", "Vernon", "Veronica", "Vicki", "Vickie", "Vicky",
-            "Victor", "Victoria", "Vincent", "Virginia", "Vivian", "Walter",
-            "Wanda", "Wayne", "Wendy", "Wesley", "Whitney", "William",
-            "Willie", "Wyatt", "Zachary" };
-
-    public static String[] LASTNAMES = { "Abbott", "Acevedo", "Acosta",
-            "Adams", "Adkins", "Aguilar", "Aguirre", "Albert", "Alexander",
-            "Alford", "Allen", "Allison", "Alston", "Alvarado", "Alvarez",
-            "Anderson", "Andrews", "Anthony", "Armstrong", "Arnold", "Ashley",
-            "Atkins", "Atkinson", "Austin", "Avery", "Avila", "Ayala", "Ayers",
-            "Bailey", "Baird", "Baker", "Baldwin", "Ball", "Ballard", "Banks",
-            "Barber", "Smith", "Johnson", "Williams", "Jones", "Brown",
-            "Davis", "Miller", "Wilson", "Moore", "Taylor", "Thomas",
-            "Jackson", "Barker", "Barlow", "Barnes", "Barnett", "Barr",
-            "Barrera", "Barrett", "Barron", "Barry", "Bartlett", "Barton",
-            "Bass", "Bates", "Battle", "Bauer", "Baxter", "Beach", "Bean",
-            "Beard", "Beasley", "Beck", "Becker", "Bell", "Bender", "Benjamin",
-            "Bennett", "Benson", "Bentley", "Benton", "Berg", "Berger",
-            "Bernard", "Berry", "Best", "Bird", "Bishop", "Black", "Blackburn",
-            "Blackwell", "Blair", "Blake", "Blanchard", "Blankenship",
-            "Blevins", "Bolton", "Bond", "Bonner", "Booker", "Boone", "Booth",
-            "Bowen", "Bowers", "Bowman", "Boyd", "Boyer", "Boyle", "Bradford",
-            "Bradley", "Bradshaw", "Brady", "Branch", "Bray", "Brennan",
-            "Brewer", "Bridges", "Briggs", "Bright", "Britt", "Brock",
-            "Brooks", "Browning", "Bruce", "Bryan", "Bryant", "Buchanan",
-            "Buck", "Buckley", "Buckner", "Bullock", "Burch", "Burgess",
-            "Burke", "Burks", "Burnett", "Burns", "Burris", "Burt", "Burton",
-            "Bush", "Butler", "Byers", "Byrd", "Cabrera", "Cain", "Calderon",
-            "Caldwell", "Calhoun", "Callahan", "Camacho", "Cameron",
-            "Campbell", "Campos", "Cannon", "Cantrell", "Cantu", "Cardenas",
-            "Carey", "Carlson", "Carney", "Carpenter", "Carr", "Carrillo",
-            "Carroll", "Carson", "Carter", "Carver", "Case", "Casey", "Cash",
-            "Castaneda", "Castillo", "Castro", "Cervantes", "Chambers", "Chan",
-            "Chandler", "Chaney", "Chang", "Chapman", "Charles", "Chase",
-            "Chavez", "Chen", "Cherry", "Christensen", "Christian", "Church",
-            "Clark", "Clarke", "Clay", "Clayton", "Clements", "Clemons",
-            "Cleveland", "Cline", "Cobb", "Cochran", "Coffey", "Cohen", "Cole",
-            "Coleman", "Collier", "Collins", "Colon", "Combs", "Compton",
-            "Conley", "Conner", "Conrad", "Contreras", "Conway", "Cook",
-            "Cooke", "Cooley", "Cooper", "Copeland", "Cortez", "Cote",
-            "Cotton", "Cox", "Craft", "Craig", "Crane", "Crawford", "Crosby",
-            "Cross", "Cruz", "Cummings", "Cunningham", "Curry", "Curtis",
-            "Dale", "Dalton", "Daniel", "Daniels", "Daugherty", "Davenport",
-            "David", "Davidson", "Dawson", "Day", "Dean", "Decker", "Dejesus",
-            "Delacruz", "Delaney", "Deleon", "Delgado", "Dennis", "Diaz",
-            "Dickerson", "Dickinson", "Dillard", "Dillon", "Dixon", "Dodson",
-            "Dominguez", "Donaldson", "Donovan", "Dorsey", "Dotson", "Douglas",
-            "Downs", "Doyle", "Drake", "Dudley", "Duffy", "Duke", "Duncan",
-            "Dunlap", "Dunn", "Duran", "Durham", "Dyer", "Eaton", "Edwards",
-            "Elliott", "Ellis", "Ellison", "Emerson", "England", "English",
-            "Erickson", "Espinoza", "Estes", "Estrada", "Evans", "Everett",
-            "Ewing", "Farley", "Farmer", "Farrell", "Faulkner", "Ferguson",
-            "Fernandez", "Ferrell", "Fields", "Figueroa", "Finch", "Finley",
-            "Fischer", "Fisher", "Fitzgerald", "Fitzpatrick", "Fleming",
-            "Fletcher", "Flores", "Flowers", "Floyd", "Flynn", "Foley",
-            "Forbes", "Ford", "Foreman", "Foster", "Fowler", "Fox", "Francis",
-            "Franco", "Frank", "Franklin", "Franks", "Frazier", "Frederick",
-            "Freeman", "French", "Frost", "Fry", "Frye", "Fuentes", "Fuller",
-            "Fulton", "Gaines", "Gallagher", "Gallegos", "Galloway", "Gamble",
-            "Garcia", "Gardner", "Garner", "Garrett", "Garrison", "Garza",
-            "Gates", "Gay", "Gentry", "George", "Gibbs", "Gibson", "Gilbert",
-            "Giles", "Gill", "Gillespie", "Gilliam", "Gilmore", "Glass",
-            "Glenn", "Glover", "Goff", "Golden", "Gomez", "Gonzales",
-            "Gonzalez", "Good", "Goodman", "Goodwin", "Gordon", "Gould",
-            "Graham", "Grant", "Graves", "Gray", "Green", "Greene", "Greer",
-            "Gregory", "Griffin", "Griffith", "Grimes", "Gross", "Guerra",
-            "Guerrero", "Guthrie", "Gutierrez", "Guy", "Guzman", "Hahn",
-            "Hale", "Haley", "Hall", "Hamilton", "Hammond", "Hampton",
-            "Hancock", "Haney", "Hansen", "Hanson", "Hardin", "Harding",
-            "Hardy", "Harmon", "Harper", "Harris", "Harrington", "Harrison",
-            "Hart", "Hartman", "Harvey", "Hatfield", "Hawkins", "Hayden",
-            "Hayes", "Haynes", "Hays", "Head", "Heath", "Hebert", "Henderson",
-            "Hendricks", "Hendrix", "Henry", "Hensley", "Henson", "Herman",
-            "Hernandez", "Herrera", "Herring", "Hess", "Hester", "Hewitt",
-            "Hickman", "Hicks", "Higgins", "Hill", "Hines", "Hinton", "Hobbs",
-            "Hodge", "Hodges", "Hoffman", "Hogan", "Holcomb", "Holden",
-            "Holder", "Holland", "Holloway", "Holman", "Holmes", "Holt",
-            "Hood", "Hooper", "Hoover", "Hopkins", "Hopper", "Horn", "Horne",
-            "Horton", "House", "Houston", "Howard", "Howe", "Howell",
-            "Hubbard", "Huber", "Hudson", "Huff", "Huffman", "Hughes", "Hull",
-            "Humphrey", "Hunt", "Hunter", "Hurley", "Hurst", "Hutchinson",
-            "Hyde", "Ingram", "Irwin", "Jacobs", "Jacobson", "James", "Jarvis",
-            "Jefferson", "Jenkins", "Jennings", "Jensen", "Jimenez", "Johns",
-            "Johnston", "Jordan", "Joseph", "Joyce", "Joyner", "Juarez",
-            "Justice", "Kane", "Kaufman", "Keith", "Keller", "Kelley", "Kelly",
-            "Kemp", "Kennedy", "Kent", "Kerr", "Key", "Kidd", "Kim", "King",
-            "Kinney", "Kirby", "Kirk", "Kirkland", "Klein", "Kline", "Knapp",
-            "Knight", "Knowles", "Knox", "Koch", "Kramer", "Lamb", "Lambert",
-            "Lancaster", "Landry", "Lane", "Lang", "Langley", "Lara", "Larsen",
-            "Larson", "Lawrence", "Lawson", "Le", "Leach", "Leblanc", "Lee",
-            "Leon", "Leonard", "Lester", "Levine", "Levy", "Lewis", "Lindsay",
-            "Lindsey", "Little", "Livingston", "Lloyd", "Logan", "Long",
-            "Lopez", "Lott", "Love", "Lowe", "Lowery", "Lucas", "Luna",
-            "Lynch", "Lynn", "Lyons", "Macdonald", "Macias", "Mack", "Madden",
-            "Maddox", "Maldonado", "Malone", "Mann", "Manning", "Marks",
-            "Marquez", "Marsh", "Marshall", "Martin", "Martinez", "Mason",
-            "Massey", "Mathews", "Mathis", "Matthews", "Maxwell", "May",
-            "Mayer", "Maynard", "Mayo", "Mays", "McBride", "McCall",
-            "McCarthy", "McCarty", "McClain", "McClure", "McConnell",
-            "McCormick", "McCoy", "McCray", "McCullough", "McDaniel",
-            "McDonald", "McDowell", "McFadden", "McFarland", "McGee",
-            "McGowan", "McGuire", "McIntosh", "McIntyre", "McKay", "McKee",
-            "McKenzie", "McKinney", "McKnight", "McLaughlin", "McLean",
-            "McLeod", "McMahon", "McMillan", "McNeil", "McPherson", "Meadows",
-            "Medina", "Mejia", "Melendez", "Melton", "Mendez", "Mendoza",
-            "Mercado", "Mercer", "Merrill", "Merritt", "Meyer", "Meyers",
-            "Michael", "Middleton", "Miles", "Mills", "Miranda", "Mitchell",
-            "Molina", "Monroe", "Montgomery", "Montoya", "Moody", "Moon",
-            "Mooney", "Morales", "Moran", "Moreno", "Morgan", "Morin",
-            "Morris", "Morrison", "Morrow", "Morse", "Morton", "Moses",
-            "Mosley", "Moss", "Mueller", "Mullen", "Mullins", "Munoz",
-            "Murphy", "Murray", "Myers", "Nash", "Navarro", "Neal", "Nelson",
-            "Newman", "Newton", "Nguyen", "Nichols", "Nicholson", "Nielsen",
-            "Nieves", "Nixon", "Noble", "Noel", "Nolan", "Norman", "Norris",
-            "Norton", "Nunez", "Obrien", "Ochoa", "Oconnor", "Odom",
-            "Odonnell", "Oliver", "Olsen", "Olson", "O'neal", "O'neil",
-            "O'neill", "Orr", "Ortega", "Ortiz", "Osborn", "Osborne", "Owen",
-            "Owens", "Pace", "Pacheco", "Padilla", "Page", "Palmer", "Park",
-            "Parker", "Parks", "Parrish", "Parsons", "Pate", "Patel",
-            "Patrick", "Patterson", "Patton", "Paul", "Payne", "Pearson",
-            "Peck", "Pena", "Pennington", "Perez", "Perkins", "Perry",
-            "Peters", "Petersen", "Peterson", "Petty", "Phelps", "Phillips",
-            "Pickett", "Pierce", "Pittman", "Pitts", "Pollard", "Poole",
-            "Pope", "Porter", "Potter", "Potts", "Powell", "Powers", "Pratt",
-            "Preston", "Price", "Prince", "Pruitt", "Puckett", "Pugh", "Quinn",
-            "Ramirez", "Ramos", "Ramsey", "Randall", "Randolph", "Rasmussen",
-            "Ratliff", "Ray", "Raymond", "Reed", "Reese", "Reeves", "Reid",
-            "Reilly", "Reyes", "Reynolds", "Rhodes", "Rice", "Rich", "Richard",
-            "Richards", "Richardson", "Richmond", "Riddle", "Riggs", "Riley",
-            "Rios", "Rivas", "Rivera", "Rivers", "Roach", "Robbins",
-            "Roberson", "Roberts", "Robertson", "Robinson", "Robles", "Rocha",
-            "Rodgers", "Rodriguez", "Rodriquez", "Rogers", "Rojas", "Rollins",
-            "Roman", "Romero", "Rosa", "Rosales", "Rosario", "Rose", "Ross",
-            "Roth", "Rowe", "Rowland", "Roy", "Ruiz", "Rush", "Russell",
-            "Russo", "Rutledge", "Ryan", "Salas", "Salazar", "Salinas",
-            "Sampson", "Sanchez", "Sanders", "Sandoval", "Sanford", "Santana",
-            "Santiago", "Santos", "Sargent", "Saunders", "Savage", "Sawyer",
-            "Schmidt", "Schneider", "Schroeder", "Schultz", "Schwartz",
-            "Scott", "Sears", "Sellers", "Serrano", "Sexton", "Shaffer",
-            "Shannon", "Sharp", "Sharpe", "Shaw", "Shelton", "Shepard",
-            "Shepherd", "Sheppard", "Sherman", "Shields", "Short", "Silva",
-            "Simmons", "Simon", "Simpson", "Sims", "Singleton", "Skinner",
-            "Slater", "Sloan", "Small", "Snider", "Snow", "Snyder", "Solis",
-            "Solomon", "Sosa", "Soto", "Sparks", "Spears", "Spence", "Spencer",
-            "Stafford", "Stanley", "Stanton", "Stark", "Steele", "Stein",
-            "Stephens", "Stephenson", "Stevens", "Stevenson", "Stewart",
-            "Stokes", "Stone", "Stout", "Strickland", "Strong", "Stuart",
-            "Suarez", "Sullivan", "Summers", "Sutton", "Swanson", "Sweeney",
-            "Sweet", "Sykes", "Talley", "Tanner", "Tate", "Terrell", "Terry",
-            "Thompson", "Thornton", "Tillman", "Todd", "Torres", "Townsend",
-            "Tran", "Travis", "Trevino", "Trujillo", "Tucker", "Turner",
-            "Tyler", "Tyson", "Underwood", "Valdez", "Valencia", "Valentine",
-            "Valenzuela", "Vance", "Vang", "Vargas", "Vasquez", "Vaughan",
-            "Vaughn", "Vazquez", "Vega", "Velasquez", "Velazquez", "Velez",
-            "Van halen", "Vincent", "Vinson", "Wade", "Wagner", "Walker",
-            "Wall", "Wallace", "Waller", "Walls", "Walsh", "Walter", "Walters",
-            "Walton", "Ward", "Ware", "Warner", "Warren", "Washington",
-            "Waters", "Watkins", "Watson", "Watts", "Weaver", "Webb", "Weber",
-            "Webster", "Weeks", "Weiss", "Welch", "Wells", "West", "Wheeler",
-            "Whitaker", "White", "Whitehead", "Whitfield", "Whitley",
-            "Whitney", "Wiggins", "Wilcox", "Wilder", "Wiley", "Wilkerson",
-            "Wilkins", "Wilkinson", "William", "Williamson", "Willis",
-            "Winters", "Wise", "Witt", "Wolf", "Wolfe", "Wong", "Wood",
-            "Woodard", "Woods", "Woodward", "Wooten", "Workman", "Wright",
-            "Wyatt", "Wynn", "Yang", "Yates", "York", "Young", "Zamora",
-            "Zimmerman"
-    };
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/Product.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/Product.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/Product.java
new file mode 100644
index 0000000..9ef3d67
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/Product.java
@@ -0,0 +1,63 @@
+package org.apache.bigtop.bigpetstore.generator.util;
+
+import java.math.BigDecimal;
+import static org.apache.bigtop.bigpetstore.generator.util.ProductType.*;
+
+public enum Product {
+  DOG_FOOD(DOG, 10.50),
+  ORGANIC_DOG_FOOD(DOG, 16.99),
+  STEEL_LEASH(DOG, 19.99),
+  FUZZY_COLLAR(DOG, 24.90),
+  LEATHER_COLLAR(DOG, 18.90),
+  CHOKE_COLLAR(DOG, 15.50),
+  DOG_HOUSE(DOG, 109.99),
+  CHEWY_BONE(DOG, 20.10),
+  DOG_VEST(DOG, 19.99),
+  DOG_SOAP(DOG, 5.45),
+
+  CAT_FOOD(CAT, 7.50),
+  FEEDER_BOWL(CAT, 10.99),
+  LITTER_BOX(CAT, 24.95),
+  CAT_COLLAR(CAT, 7.95),
+  CAT_BLANKET(CAT, 14.49),
+
+  TURTLE_PELLETS(TURTLE, 4.95),
+  TURTLE_FOOD(TURTLE, 10.90),
+  TURTLE_TUB(TURTLE, 40.45),
+
+  FISH_FOOD(FISH, 12.50),
+  SALMON_BAIT(FISH, 29.95),
+  FISH_BOWL(FISH, 20.99),
+  AIR_PUMP(FISH, 13.95),
+  FILTER(FISH, 34.95),
+
+  DUCK_COLLAR(DUCK, 13.25),
+  DUCK_FOOD(DUCK, 20.25),
+  WADING_POOL(DUCK, 45.90);
+
+  /*
+  ANTELOPE_COLLAR(OTHER, 19.90),
+  ANTELOPE_SNACKS(OTHER, 29.25),
+  RODENT_CAGE(OTHER, 39.95),
+  HAY_BALE(OTHER, 4.95),
+  COW_DUNG(OTHER, 1.95),
+  SEAL_SPRAY(OTHER, 24.50),
+  SNAKE_BITE_OINTMENT(OTHER, 29.90);
+  */
+  private final BigDecimal price;
+  public final ProductType productType;
+  private Product(ProductType productType, double price) {
+    this.price = BigDecimal.valueOf(price);
+    this.productType = productType;
+  }
+
+  public int id() {
+    return this.ordinal();
+  }
+
+  public BigDecimal price() {
+    return this.price;
+  }
+
+
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/ProductType.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/ProductType.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/ProductType.java
new file mode 100644
index 0000000..f41b604
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/ProductType.java
@@ -0,0 +1,29 @@
+package org.apache.bigtop.bigpetstore.generator.util;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+public enum ProductType {
+  DOG, CAT, TURTLE, FISH, DUCK;
+
+  private List<Product> products;
+
+  public List<Product> getProducts() {
+    if(products == null) {
+      generateProductList();
+    }
+    return products;
+  }
+
+  private void generateProductList() {
+    List<Product> products = new ArrayList<>();
+    for(Product p : Product.values()) {
+      if(p.productType == this) {
+        products.add(p);
+      }
+    }
+    this.products = Collections.unmodifiableList(products);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/State.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/State.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/State.java
new file mode 100644
index 0000000..f2b845a
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/State.java
@@ -0,0 +1,26 @@
+package org.apache.bigtop.bigpetstore.generator.util;
+
+import java.util.Random;
+
+
+/**
+ * Each "state" has a pet store , with a certain "proportion" of the
+ * transactions.
+ */
+public enum State {
+  // Each state is associated with a relative probability.
+  AZ(.1f),
+  AK(.1f),
+  CT(.1f),
+  OK(.1f),
+  CO(.1f),
+  CA(.3f),
+  NY(.2f);
+
+  public static Random rand = new Random();
+  public float probability;
+
+  private State(float probability) {
+    this.probability = probability;
+  }
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/recommend/ItemRecommender.scala
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/recommend/ItemRecommender.scala b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/recommend/ItemRecommender.scala
new file mode 100644
index 0000000..0ec240e
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/recommend/ItemRecommender.scala
@@ -0,0 +1,103 @@
+package org.apache.bigtop.bigpetstore.recommend
+
+import org.apache.mahout.cf.taste.hadoop.als.RecommenderJob
+import org.apache.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob
+import java.io.File
+import parquet.org.codehaus.jackson.map.DeserializerFactory.Config
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.conf.Configurable
+import org.apache.hadoop.util.ToolRunner
+import org.apache.mahout.cf.taste.hadoop.als.SharingMapper
+import org.apache.hadoop.util.Tool
+import org.apache.bigtop.bigpetstore.util.DeveloperTools
+
+// We don't need to wrap these two jobs in ToolRunner.run calls since the only
+// thing that we are doing right now is calling the run() methods of RecommenderJob
+// and ParallelALSFactorizationJob. Both of these classes have a main() method that
+// internally calls ToolRunner.run with all the command line args passed. So, if
+// we want to run this code from the command line, we can easily do so by running
+// the main methods of the ParallelALSFactorizationJob, followed by running the
+// main method of RecommenderJob. That would also take care of the multiple-jvm
+// instance issue metioned in the comments below, so the call to
+class ItemRecommender(private val inputDir: String,
+        private val factorizationOutputDir: String,
+        private val recommendationsOutputDir: String) {
+  private val recommenderJob = new RecommenderJob
+  private val factorizationJob = new ParallelALSFactorizationJob
+
+  private def tempDir = "/tmp/mahout_" + System.currentTimeMillis
+
+  private def performAlsFactorization() = {
+    ToolRunner.run(factorizationJob, Array(
+        "--input", inputDir,
+        "--output", factorizationOutputDir,
+        "--lambda", "0.1",
+        "--tempDir", tempDir,
+        "--implicitFeedback", "false",
+        "--alpha", "0.8",
+        "--numFeatures", "2",
+        "--numIterations", "5",
+        "--numThreadsPerSolver", "1"))
+  }
+
+  private def generateRecommendations() = {
+    ToolRunner.run(recommenderJob, (Array(
+        "--input", factorizationOutputDir + "/userRatings/",
+        "--userFeatures", factorizationOutputDir + "/U/",
+        "--itemFeatures", factorizationOutputDir + "/M/",
+        "--numRecommendations", "1",
+        "--output", recommendationsOutputDir,
+        "--maxRating", "1")))
+  }
+
+  // At this point, the performAlsFactorization generateRecommendations
+  // and this method can not be run from the same VM instance. These two jobs
+  // share a common static variable which is not being handled correctly.
+  // This, unfortunately, results in a class-cast exception being thrown. That's
+  // why the resetFlagInSharedAlsMapper is required. See the comments on
+  // resetFlagInSharedAlsMapper() method.
+  def recommend = {
+    performAlsFactorization
+    resetFlagInSharedAlsMapper
+    generateRecommendations
+  }
+
+  // necessary for local execution in the same JVM only. If the performAlsFactorization()
+  // and generateRecommendations() calls are performed in separate JVM instances, this
+  // would be taken care of automatically. However, if we want to run this two methods
+  // as one task, we need to clean up the static state set by these methods, and we don't
+  // have any legitimate way of doing this directly. This clean-up should have been
+  // performed by ParallelALSFactorizationJob class after the job is finished.
+  // TODO: remove this when a better way comes along, or ParallelALSFactorizationJob
+  // takes responsibility.
+  private def resetFlagInSharedAlsMapper {
+    val m = classOf[SharingMapper[_, _, _, _, _]].getDeclaredMethod("reset");
+    m setAccessible true
+    m.invoke(null)
+  }
+}
+
+object ItemRecommender {
+  def main(args: Array[String]) {
+      val res = ToolRunner.run(new Configuration(), new Tool() {
+      var conf: Configuration = _;
+
+      override def setConf(conf: Configuration) {
+        this.conf=conf;
+      }
+
+
+      override def getConf() = {
+        this.conf;
+      }
+
+
+      override def run(toolArgs: Array[String]) = {
+        val ir = new ItemRecommender(toolArgs(0), toolArgs(1), toolArgs(2))
+        ir.recommend
+        0;
+      }
+    }, args);
+    System.exit(res);
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java
index 29f7c67..01a6b95 100755
--- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java
@@ -6,13 +6,13 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- * 
+ *
  * Static final constants
  *
  * is useful to have the basic sql here as the HIVE SQL can vary between hive
@@ -24,13 +24,18 @@ package org.apache.bigtop.bigpetstore.util;
 public class BigPetStoreConstants {
 
    //Files should be stored in graphviz arch.dot
-   public enum OUTPUTS{
+   public static enum OUTPUTS {
         generated,//generator
         cleaned,//pig
+        tsv,
         pig_ad_hoc_script,
-        MAHOUT_CF_IN,//hive view over data for mahout
-        MAHOUT_CF_OUT,//mahout cf results
-        CUSTOMER_PAGE//crunchhh
+        CUSTOMER_PAGE; //crunchhh
+
+        public static enum MahoutPaths {
+          Mahout,
+          AlsFactorization,
+          AlsRecommendations
+        }
     };
 
 }

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java
index 9fa9455..c652beb 100644
--- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java
+++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java
@@ -6,7 +6,7 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -16,16 +16,14 @@
 
 package org.apache.bigtop.bigpetstore.util;
 
-import java.math.BigInteger;
-
-import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory.STATE;
+import org.apache.bigtop.bigpetstore.generator.util.State;
 
 /**
  * User and Product IDs need numerical
  * identifiers for recommender algorithms
  * which attempt to interpolate new
  * products.
- * 
+ *
  * TODO: Delete this class. Its not necessarily required: We might just use HIVE HASH() as our
  * standard for this.
  */
@@ -34,7 +32,7 @@ public class NumericalIdUtils {
     /**
      * People: Leading with ordinal code for state.
      */
-    public static long toId(STATE state, String name){
+    public static long toId(State state, String name){
         String fromRawData =
                 state==null?
                         name:

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/Pair.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/Pair.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/Pair.java
deleted file mode 100644
index a96fa44..0000000
--- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/Pair.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.bigtop.bigpetstore.util;
-
-import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory;
-
-import java.util.Comparator;
-
-@Deprecated
-public class Pair<S, T> implements Comparable<Pair<S, T>> {
-
-  private final S first;
-  private final T second;
-
-  public Pair(final S car, final T cdr) {
-    first = car;
-    second = cdr;
-  }
-
-  public S getFirst() { return first; }
-  public T getSecond() { return second; }
-
-  @Override
-  public boolean equals(Object o) {
-    if (null == o) {
-      return false;
-    } else if (o instanceof Pair) {
-      Pair<S, T> p = (Pair<S, T>) o;
-      if (first == null && second == null) {
-        return p.first == null && p.second == null;
-      } else if (first == null) {
-        return p.first == null && second.equals(p.second);
-      } else if (second == null) {
-        return p.second == null && first.equals(p.first);
-      } else {
-        return first.equals(p.first) && second.equals(p.second);
-      }
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public int hashCode() {
-    int code = 0;
-
-    if (null != first) {
-      code += first.hashCode();
-    }
-
-    if (null != second) {
-      code += second.hashCode() << 1;
-    }
-
-    return code;
-  }
-
-  @Override
-  public int compareTo(Pair<S, T> p) {
-    if (null == p) {
-      return 1;
-    }
-
-    Comparable<S> firstCompare = (Comparable<S>) first;
-
-    int firstResult = firstCompare.compareTo(p.first);
-    if (firstResult == 0) {
-      Comparable<T> secondCompare = (Comparable<T>) second;
-      return secondCompare.compareTo(p.second);
-    } else {
-      return firstResult;
-    }
-  }
-
-  // TODO: Can this be made static? Same with SecondElemComparator?
-  public class FirstElemComparator implements Comparator<Pair<S, T>> {
-    public FirstElemComparator() {
-    }
-
-    public int compare(Pair<S, T> p1, Pair<S, T> p2) {
-      Comparable<S> cS = (Comparable<S>) p1.first;
-      return cS.compareTo(p2.first);
-    }
-  }
-
-  public class SecondElemComparator implements Comparator<Pair<S, T>> {
-    public SecondElemComparator() {
-    }
-
-    public int compare(Pair<S, T> p1, Pair<S, T> p2) {
-      Comparable<T> cT = (Comparable<T>) p1.second;
-      return cT.compareTo(p2.second);
-    }
-  }
-
-  @Override
-  public String toString() {
-    String firstString = "null";
-    String secondString = "null";
-
-    if (null != first) {
-      firstString = first.toString();
-    }
-
-    if (null != second) {
-      secondString = second.toString();
-    }
-
-    return "(" + firstString + ", " + secondString + ")";
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/DataForger.scala
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/DataForger.scala b/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/DataForger.scala
new file mode 100644
index 0000000..de9b29b
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/DataForger.scala
@@ -0,0 +1,263 @@
+package org.apache.bigtop.bigpetstore.generator
+
+import java.util.Random
+import org.jfairy.Fairy
+import java.util.Date
+
+
+/**
+ * Generic class for generating random data. This class was created so
+ * that we can provide a uniform API for getting random data. If we want,
+ * we can replace the underlying data-generation implementation using
+ * existing libraries.
+ */
+object DataForger {
+  private val random = new Random
+  private val fairy = Fairy.create()
+
+  // TODO: Jay / Bhashit : refactor to use a random data generator?
+  def firstName(random: Random) = firstNames(random.nextInt(firstNames.length))
+  def firstName: String = firstName(random)
+
+  // TODO: Jay / Bhashit : refactor to use a random data generator?
+  def lastName(random: Random) = lastNames(random.nextInt(lastNames.length))
+  def lastName: String = lastName(random)
+
+  def randomDateInPastYears(maxYearsEarlier: Int) = fairy.dateProducer().randomDateInThePast(maxYearsEarlier).toDate()
+
+  private val firstNames =  IndexedSeq("Aaron", "Abby", "Abigail", "Adam",
+          "Alan", "Albert", "Alex", "Alexandra", "Alexis", "Alice", "Alicia",
+          "Alisha", "Alissa", "Allen", "Allison", "Alyssa", "Amanda", "Amber",
+          "Amy", "Andrea", "Andrew", "Andy", "Angel", "Angela", "Angie",
+          "Anita", "Ann", "Anna", "Annette", "Anthony", "Antonio", "April",
+          "Arthur", "Ashley", "Audrey", "Austin", "Autumn", "Baby", "Barb",
+          "Barbara", "Becky", "Benjamin", "Beth", "Bethany", "Betty",
+          "Beverly", "Bill", "Billie", "Billy", "Blake", "Bob", "Bobbie",
+          "Bobby", "Bonnie", "Brad", "Bradley", "Brady", "Brandi", "Brandon",
+          "Brandy", "Breanna", "Brenda", "Brent", "Brett", "Brian", "Brianna",
+          "Brittany", "Brooke", "Brooklyn", "Bruce", "Bryan", "Caleb",
+          "Cameron", "Candy", "Carl", "Carla", "Carmen", "Carol", "Carolyn",
+          "Carrie", "Casey", "Cassandra", "Catherine", "Cathy", "Chad",
+          "Charlene", "Charles", "Charlie", "Charlotte", "Chase", "Chasity",
+          "Chastity", "Chelsea", "Cheryl", "Chester", "Cheyenne", "Chris",
+          "Christian", "Christina", "Christine", "Christoph", "Christopher",
+          "Christy", "Chuck", "Cindy", "Clara", "Clarence", "Clayton",
+          "Clifford", "Clint", "Cody", "Colton", "Connie", "Corey", "Cory",
+          "Courtney", "Craig", "Crystal", "Curtis", "Cynthia", "Dakota",
+          "Dale", "Dallas", "Dalton", "Dan", "Dana", "Daniel", "Danielle",
+          "Danny", "Darla", "Darlene", "Darrell", "Darren", "Dave", "David",
+          "Dawn", "Dean", "Deanna", "Debbie", "Deborah", "Debra", "Denise",
+          "Dennis", "Derek", "Derrick", "Destiny", "Devin", "Diana", "Diane",
+          "Dillon", "Dixie", "Dominic", "Don", "Donald", "Donna", "Donnie",
+          "Doris", "Dorothy", "Doug", "Douglas", "Drew", "Duane", "Dustin",
+          "Dusty", "Dylan", "Earl", "Ed", "Eddie", "Edward", "Elaine",
+          "Elizabeth", "Ellen", "Emily", "Eric", "Erica", "Erika", "Erin",
+          "Ernest", "Ethan", "Eugene", "Eva", "Evelyn", "Everett", "Faith",
+          "Father", "Felicia", "Floyd", "Francis", "Frank", "Fred", "Gabriel",
+          "Gage", "Gail", "Gary", "Gene", "George", "Gerald", "Gina", "Ginger",
+          "Glen", "Glenn", "Gloria", "Grace", "Greg", "Gregory", "Haley",
+          "Hannah", "Harley", "Harold", "Harry", "Heath", "Heather", "Heidi",
+          "Helen", "Herbert", "Holly", "Hope", "Howard", "Hunter", "Ian",
+          "Isaac", "Jack", "Jackie", "Jacob", "Jade", "Jake", "James", "Jamie",
+          "Jan", "Jane", "Janet", "Janice", "Jared", "Jasmine", "Jason", "Jay",
+          "Jean", "Jeannie", "Jeff", "Jeffery", "Jeffrey", "Jenna", "Jennifer",
+          "Jenny", "Jeremiah", "Jeremy", "Jerry", "Jesse", "Jessica", "Jessie",
+          "Jill", "Jim", "Jimmy", "Joann", "Joanne", "Jodi", "Jody", "Joe",
+          "Joel", "Joey", "John", "Johnathan", "Johnny", "Jon", "Jonathan",
+          "Jonathon", "Jordan", "Joseph", "Josh", "Joshua", "Joyce", "Juanita",
+          "Judy", "Julia", "Julie", "Justin", "Kaitlyn", "Karen", "Katelyn",
+          "Katherine", "Kathleen", "Kathryn", "Kathy", "Katie", "Katrina",
+          "Kay", "Kayla", "Kaylee", "Keith", "Kelly", "Kelsey", "Ken",
+          "Kendra", "Kenneth", "Kenny", "Kevin", "Kim", "Kimberly", "Kris",
+          "Krista", "Kristen", "Kristin", "Kristina", "Kristy", "Kyle",
+          "Kylie", "Lacey", "Laken", "Lance", "Larry", "Laura", "Lawrence",
+          "Leah", "Lee", "Leonard", "Leroy", "Leslie", "Levi", "Lewis",
+          "Linda", "Lindsay", "Lindsey", "Lisa", "Lloyd", "Logan", "Lois",
+          "Loretta", "Lori", "Louis", "Lynn", "Madison", "Mandy", "Marcus",
+          "Margaret", "Maria", "Mariah", "Marie", "Marilyn", "Marion", "Mark",
+          "Marlene", "Marsha", "Martha", "Martin", "Marty", "Marvin", "Mary",
+          "Mary ann", "Mason", "Matt", "Matthew", "Max", "Megan", "Melanie",
+          "Melinda", "Melissa", "Melody", "Michael", "Michelle", "Mickey",
+          "Mike", "Mindy", "Miranda", "Misty", "Mitchell", "Molly", "Monica",
+          "Morgan", "Mother", "Myron", "Nancy", "Natasha", "Nathan",
+          "Nicholas", "Nick", "Nicole", "Nina", "Noah", "Norma", "Norman",
+          "Olivia", "Paige", "Pam", "Pamela", "Pat", "Patricia", "Patrick",
+          "Patty", "Paul", "Paula", "Peggy", "Penny", "Pete", "Phillip",
+          "Phyllis", "Rachael", "Rachel", "Ralph", "Randall", "Randi", "Randy",
+          "Ray", "Raymond", "Rebecca", "Regina", "Renee", "Rex", "Rhonda",
+          "Richard", "Rick", "Ricky", "Rita", "Rob", "Robbie", "Robert",
+          "Roberta", "Robin", "Rochelle", "Rocky", "Rod", "Rodney", "Roger",
+          "Ron", "Ronald", "Ronda", "Ronnie", "Rose", "Roxanne", "Roy", "Russ",
+          "Russell", "Rusty", "Ruth", "Ryan", "Sabrina", "Sally", "Sam",
+          "Samantha", "Samuel", "Sandra", "Sandy", "Sara", "Sarah", "Savannah",
+          "Scott", "Sean", "Seth", "Shanda", "Shane", "Shanna", "Shannon",
+          "Sharon", "Shaun", "Shawn", "Shawna", "Sheila", "Shelly", "Sher",
+          "Sherri", "Sherry", "Shirley", "Sierra", "Skyler", "Stacey", "Stacy",
+          "Stanley", "Stephanie", "Stephen", "Steve", "Steven", "Sue",
+          "Summer", "Susan", "Sydney", "Tabatha", "Tabitha", "Tamara", "Tammy",
+          "Tara", "Tasha", "Tashia", "Taylor", "Ted", "Teresa", "Terri",
+          "Terry", "Tessa", "Thelma", "Theresa", "Thomas", "Tia", "Tiffany",
+          "Tim", "Timmy", "Timothy", "Tina", "Todd", "Tom", "Tommy", "Toni",
+          "Tony", "Tonya", "Tracey", "Tracie", "Tracy", "Travis", "Trent",
+          "Trevor", "Trey", "Trisha", "Tristan", "Troy", "Tyler", "Tyrone",
+          "Unborn", "Valerie", "Vanessa", "Vernon", "Veronica", "Vicki",
+          "Vickie", "Vicky", "Victor", "Victoria", "Vincent", "Virginia",
+          "Vivian", "Walter", "Wanda", "Wayne", "Wendy", "Wesley", "Whitney",
+          "William", "Willie", "Wyatt", "Zachary")
+
+  private val lastNames = IndexedSeq("Abbott", "Acevedo", "Acosta", "Adams",
+          "Adkins", "Aguilar", "Aguirre", "Albert", "Alexander", "Alford",
+          "Allen", "Allison", "Alston", "Alvarado", "Alvarez", "Anderson",
+          "Andrews", "Anthony", "Armstrong", "Arnold", "Ashley", "Atkins",
+          "Atkinson", "Austin", "Avery", "Avila", "Ayala", "Ayers", "Bailey",
+          "Baird", "Baker", "Baldwin", "Ball", "Ballard", "Banks", "Barber",
+          "Smith", "Johnson", "Williams", "Jones", "Brown", "Davis", "Miller",
+          "Wilson", "Moore", "Taylor", "Thomas", "Jackson", "Barker", "Barlow",
+          "Barnes", "Barnett", "Barr", "Barrera", "Barrett", "Barron", "Barry",
+          "Bartlett", "Barton", "Bass", "Bates", "Battle", "Bauer", "Baxter",
+          "Beach", "Bean", "Beard", "Beasley", "Beck", "Becker", "Bell",
+          "Bender", "Benjamin", "Bennett", "Benson", "Bentley", "Benton",
+          "Berg", "Berger", "Bernard", "Berry", "Best", "Bird", "Bishop",
+          "Black", "Blackburn", "Blackwell", "Blair", "Blake", "Blanchard",
+          "Blankenship", "Blevins", "Bolton", "Bond", "Bonner", "Booker",
+          "Boone", "Booth", "Bowen", "Bowers", "Bowman", "Boyd", "Boyer",
+          "Boyle", "Bradford", "Bradley", "Bradshaw", "Brady", "Branch",
+          "Bray", "Brennan", "Brewer", "Bridges", "Briggs", "Bright", "Britt",
+          "Brock", "Brooks", "Browning", "Bruce", "Bryan", "Bryant",
+          "Buchanan", "Buck", "Buckley", "Buckner", "Bullock", "Burch",
+          "Burgess", "Burke", "Burks", "Burnett", "Burns", "Burris", "Burt",
+          "Burton", "Bush", "Butler", "Byers", "Byrd", "Cabrera", "Cain",
+          "Calderon", "Caldwell", "Calhoun", "Callahan", "Camacho", "Cameron",
+          "Campbell", "Campos", "Cannon", "Cantrell", "Cantu", "Cardenas",
+          "Carey", "Carlson", "Carney", "Carpenter", "Carr", "Carrillo",
+          "Carroll", "Carson", "Carter", "Carver", "Case", "Casey", "Cash",
+          "Castaneda", "Castillo", "Castro", "Cervantes", "Chambers", "Chan",
+          "Chandler", "Chaney", "Chang", "Chapman", "Charles", "Chase",
+          "Chavez", "Chen", "Cherry", "Christensen", "Christian", "Church",
+          "Clark", "Clarke", "Clay", "Clayton", "Clements", "Clemons",
+          "Cleveland", "Cline", "Cobb", "Cochran", "Coffey", "Cohen", "Cole",
+          "Coleman", "Collier", "Collins", "Colon", "Combs", "Compton",
+          "Conley", "Conner", "Conrad", "Contreras", "Conway", "Cook", "Cooke",
+          "Cooley", "Cooper", "Copeland", "Cortez", "Cote", "Cotton", "Cox",
+          "Craft", "Craig", "Crane", "Crawford", "Crosby", "Cross", "Cruz",
+          "Cummings", "Cunningham", "Curry", "Curtis", "Dale", "Dalton",
+          "Daniel", "Daniels", "Daugherty", "Davenport", "David", "Davidson",
+          "Dawson", "Day", "Dean", "Decker", "Dejesus", "Delacruz", "Delaney",
+          "Deleon", "Delgado", "Dennis", "Diaz", "Dickerson", "Dickinson",
+          "Dillard", "Dillon", "Dixon", "Dodson", "Dominguez", "Donaldson",
+          "Donovan", "Dorsey", "Dotson", "Douglas", "Downs", "Doyle", "Drake",
+          "Dudley", "Duffy", "Duke", "Duncan", "Dunlap", "Dunn", "Duran",
+          "Durham", "Dyer", "Eaton", "Edwards", "Elliott", "Ellis", "Ellison",
+          "Emerson", "England", "English", "Erickson", "Espinoza", "Estes",
+          "Estrada", "Evans", "Everett", "Ewing", "Farley", "Farmer",
+          "Farrell", "Faulkner", "Ferguson", "Fernandez", "Ferrell", "Fields",
+          "Figueroa", "Finch", "Finley", "Fischer", "Fisher", "Fitzgerald",
+          "Fitzpatrick", "Fleming", "Fletcher", "Flores", "Flowers", "Floyd",
+          "Flynn", "Foley", "Forbes", "Ford", "Foreman", "Foster", "Fowler",
+          "Fox", "Francis", "Franco", "Frank", "Franklin", "Franks", "Frazier",
+          "Frederick", "Freeman", "French", "Frost", "Fry", "Frye", "Fuentes",
+          "Fuller", "Fulton", "Gaines", "Gallagher", "Gallegos", "Galloway",
+          "Gamble", "Garcia", "Gardner", "Garner", "Garrett", "Garrison",
+          "Garza", "Gates", "Gay", "Gentry", "George", "Gibbs", "Gibson",
+          "Gilbert", "Giles", "Gill", "Gillespie", "Gilliam", "Gilmore",
+          "Glass", "Glenn", "Glover", "Goff", "Golden", "Gomez", "Gonzales",
+          "Gonzalez", "Good", "Goodman", "Goodwin", "Gordon", "Gould",
+          "Graham", "Grant", "Graves", "Gray", "Green", "Greene", "Greer",
+          "Gregory", "Griffin", "Griffith", "Grimes", "Gross", "Guerra",
+          "Guerrero", "Guthrie", "Gutierrez", "Guy", "Guzman", "Hahn", "Hale",
+          "Haley", "Hall", "Hamilton", "Hammond", "Hampton", "Hancock",
+          "Haney", "Hansen", "Hanson", "Hardin", "Harding", "Hardy", "Harmon",
+          "Harper", "Harris", "Harrington", "Harrison", "Hart", "Hartman",
+          "Harvey", "Hatfield", "Hawkins", "Hayden", "Hayes", "Haynes", "Hays",
+          "Head", "Heath", "Hebert", "Henderson", "Hendricks", "Hendrix",
+          "Henry", "Hensley", "Henson", "Herman", "Hernandez", "Herrera",
+          "Herring", "Hess", "Hester", "Hewitt", "Hickman", "Hicks", "Higgins",
+          "Hill", "Hines", "Hinton", "Hobbs", "Hodge", "Hodges", "Hoffman",
+          "Hogan", "Holcomb", "Holden", "Holder", "Holland", "Holloway",
+          "Holman", "Holmes", "Holt", "Hood", "Hooper", "Hoover", "Hopkins",
+          "Hopper", "Horn", "Horne", "Horton", "House", "Houston", "Howard",
+          "Howe", "Howell", "Hubbard", "Huber", "Hudson", "Huff", "Huffman",
+          "Hughes", "Hull", "Humphrey", "Hunt", "Hunter", "Hurley", "Hurst",
+          "Hutchinson", "Hyde", "Ingram", "Irwin", "Jacobs", "Jacobson",
+          "James", "Jarvis", "Jefferson", "Jenkins", "Jennings", "Jensen",
+          "Jimenez", "Johns", "Johnston", "Jordan", "Joseph", "Joyce",
+          "Joyner", "Juarez", "Justice", "Kane", "Kaufman", "Keith", "Keller",
+          "Kelley", "Kelly", "Kemp", "Kennedy", "Kent", "Kerr", "Key", "Kidd",
+          "Kim", "King", "Kinney", "Kirby", "Kirk", "Kirkland", "Klein",
+          "Kline", "Knapp", "Knight", "Knowles", "Knox", "Koch", "Kramer",
+          "Lamb", "Lambert", "Lancaster", "Landry", "Lane", "Lang", "Langley",
+          "Lara", "Larsen", "Larson", "Lawrence", "Lawson", "Le", "Leach",
+          "Leblanc", "Lee", "Leon", "Leonard", "Lester", "Levine", "Levy",
+          "Lewis", "Lindsay", "Lindsey", "Little", "Livingston", "Lloyd",
+          "Logan", "Long", "Lopez", "Lott", "Love", "Lowe", "Lowery", "Lucas",
+          "Luna", "Lynch", "Lynn", "Lyons", "Macdonald", "Macias", "Mack",
+          "Madden", "Maddox", "Maldonado", "Malone", "Mann", "Manning",
+          "Marks", "Marquez", "Marsh", "Marshall", "Martin", "Martinez",
+          "Mason", "Massey", "Mathews", "Mathis", "Matthews", "Maxwell", "May",
+          "Mayer", "Maynard", "Mayo", "Mays", "McBride", "McCall", "McCarthy",
+          "McCarty", "McClain", "McClure", "McConnell", "McCormick", "McCoy",
+          "McCray", "McCullough", "McDaniel", "McDonald", "McDowell",
+          "McFadden", "McFarland", "McGee", "McGowan", "McGuire", "McIntosh",
+          "McIntyre", "McKay", "McKee", "McKenzie", "McKinney", "McKnight",
+          "McLaughlin", "McLean", "McLeod", "McMahon", "McMillan", "McNeil",
+          "McPherson", "Meadows", "Medina", "Mejia", "Melendez", "Melton",
+          "Mendez", "Mendoza", "Mercado", "Mercer", "Merrill", "Merritt",
+          "Meyer", "Meyers", "Michael", "Middleton", "Miles", "Mills",
+          "Miranda", "Mitchell", "Molina", "Monroe", "Montgomery", "Montoya",
+          "Moody", "Moon", "Mooney", "Morales", "Moran", "Moreno", "Morgan",
+          "Morin", "Morris", "Morrison", "Morrow", "Morse", "Morton", "Moses",
+          "Mosley", "Moss", "Mueller", "Mullen", "Mullins", "Munoz", "Murphy",
+          "Murray", "Myers", "Nash", "Navarro", "Neal", "Nelson", "Newman",
+          "Newton", "Nguyen", "Nichols", "Nicholson", "Nielsen", "Nieves",
+          "Nixon", "Noble", "Noel", "Nolan", "Norman", "Norris", "Norton",
+          "Nunez", "Obrien", "Ochoa", "Oconnor", "Odom", "Odonnell", "Oliver",
+          "Olsen", "Olson", "O'neal", "O'neil", "O'neill", "Orr", "Ortega",
+          "Ortiz", "Osborn", "Osborne", "Owen", "Owens", "Pace", "Pacheco",
+          "Padilla", "Page", "Palmer", "Park", "Parker", "Parks", "Parrish",
+          "Parsons", "Pate", "Patel", "Patrick", "Patterson", "Patton", "Paul",
+          "Payne", "Pearson", "Peck", "Pena", "Pennington", "Perez", "Perkins",
+          "Perry", "Peters", "Petersen", "Peterson", "Petty", "Phelps",
+          "Phillips", "Pickett", "Pierce", "Pittman", "Pitts", "Pollard",
+          "Poole", "Pope", "Porter", "Potter", "Potts", "Powell", "Powers",
+          "Pratt", "Preston", "Price", "Prince", "Pruitt", "Puckett", "Pugh",
+          "Quinn", "Ramirez", "Ramos", "Ramsey", "Randall", "Randolph",
+          "Rasmussen", "Ratliff", "Ray", "Raymond", "Reed", "Reese", "Reeves",
+          "Reid", "Reilly", "Reyes", "Reynolds", "Rhodes", "Rice", "Rich",
+          "Richard", "Richards", "Richardson", "Richmond", "Riddle", "Riggs",
+          "Riley", "Rios", "Rivas", "Rivera", "Rivers", "Roach", "Robbins",
+          "Roberson", "Roberts", "Robertson", "Robinson", "Robles", "Rocha",
+          "Rodgers", "Rodriguez", "Rodriquez", "Rogers", "Rojas", "Rollins",
+          "Roman", "Romero", "Rosa", "Rosales", "Rosario", "Rose", "Ross",
+          "Roth", "Rowe", "Rowland", "Roy", "Ruiz", "Rush", "Russell", "Russo",
+          "Rutledge", "Ryan", "Salas", "Salazar", "Salinas", "Sampson",
+          "Sanchez", "Sanders", "Sandoval", "Sanford", "Santana", "Santiago",
+          "Santos", "Sargent", "Saunders", "Savage", "Sawyer", "Schmidt",
+          "Schneider", "Schroeder", "Schultz", "Schwartz", "Scott", "Sears",
+          "Sellers", "Serrano", "Sexton", "Shaffer", "Shannon", "Sharp",
+          "Sharpe", "Shaw", "Shelton", "Shepard", "Shepherd", "Sheppard",
+          "Sherman", "Shields", "Short", "Silva", "Simmons", "Simon",
+          "Simpson", "Sims", "Singleton", "Skinner", "Slater", "Sloan",
+          "Small", "Snider", "Snow", "Snyder", "Solis", "Solomon", "Sosa",
+          "Soto", "Sparks", "Spears", "Spence", "Spencer", "Stafford",
+          "Stanley", "Stanton", "Stark", "Steele", "Stein", "Stephens",
+          "Stephenson", "Stevens", "Stevenson", "Stewart", "Stokes", "Stone",
+          "Stout", "Strickland", "Strong", "Stuart", "Suarez", "Sullivan",
+          "Summers", "Sutton", "Swanson", "Sweeney", "Sweet", "Sykes",
+          "Talley", "Tanner", "Tate", "Terrell", "Terry", "Thompson",
+          "Thornton", "Tillman", "Todd", "Torres", "Townsend", "Tran",
+          "Travis", "Trevino", "Trujillo", "Tucker", "Turner", "Tyler",
+          "Tyson", "Underwood", "Valdez", "Valencia", "Valentine",
+          "Valenzuela", "Vance", "Vang", "Vargas", "Vasquez", "Vaughan",
+          "Vaughn", "Vazquez", "Vega", "Velasquez", "Velazquez", "Velez",
+          "Van halen", "Vincent", "Vinson", "Wade", "Wagner", "Walker", "Wall",
+          "Wallace", "Waller", "Walls", "Walsh", "Walter", "Walters", "Walton",
+          "Ward", "Ware", "Warner", "Warren", "Washington", "Waters",
+          "Watkins", "Watson", "Watts", "Weaver", "Webb", "Weber", "Webster",
+          "Weeks", "Weiss", "Welch", "Wells", "West", "Wheeler", "Whitaker",
+          "White", "Whitehead", "Whitfield", "Whitley", "Whitney", "Wiggins",
+          "Wilcox", "Wilder", "Wiley", "Wilkerson", "Wilkins", "Wilkinson",
+          "William", "Williamson", "Willis", "Winters", "Wise", "Witt", "Wolf",
+          "Wolfe", "Wong", "Wood", "Woodard", "Woods", "Woodward", "Wooten",
+          "Workman", "Wright", "Wyatt", "Wynn", "Yang", "Yates", "York",
+          "Young", "Zamora", "Zimmerman")
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.scala
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.scala b/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.scala
new file mode 100644
index 0000000..9e70cca
--- /dev/null
+++ b/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.scala
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.bigpetstore.generator;
+
+import java.util.Date
+import org.apache.bigtop.bigpetstore.generator.util.State
+import org.apache.commons.lang3.StringUtils
+import java.util.Arrays.asList
+import java.util.Random
+import scala.collection.Iterator
+import com.sun.org.apache.xml.internal.serializer.ToStream
+import java.util.{Iterator => JavaIterator}
+import scala.collection.JavaConversions.asJavaIterator
+import org.apache.bigtop.bigpetstore.generator.util.Product
+import org.apache.commons.lang3.Range;
+import org.apache.bigtop.bigpetstore.generator.util.ProductType
+
+/**
+ * This class generates our data. Over time we will use it to embed bias which
+ * can then be teased out, i.e. by clustering/classifiers. For example:
+ *
+ * certain products <--> certain years or days
+ */
+class TransactionIteratorFactory(private val records: Int,
+        private val customerIdRange: Range[java.lang.Long],
+        private val state: State) {
+  assert(records > 0, "Number of records must be greater than 0 to generate a data iterator!")
+  private val random = new Random(state.hashCode)
+
+  def data: JavaIterator[TransactionIteratorFactory.KeyVal[String, String]] = {
+    new TransactionIteratorFactory.DataIterator(records, customerIdRange, state, random)
+  }
+}
+
+object TransactionIteratorFactory {
+  class KeyVal[K, V](val key: K, val value: V)
+
+  private class DataIterator(records: Int,
+          customerIdRange: Range[java.lang.Long],
+          state: State,
+          r: Random) extends Iterator[KeyVal[String, String]] {
+    private var firstName: String = null
+    private var lastName: String = null
+    private var elementsProcducedCount = 0
+    private var repeatCount = 0
+    private var currentCustomerId = customerIdRange.getMinimum
+    private var currentProductType = selectRandomProductType;
+
+    def hasNext =
+      elementsProcducedCount < records && currentCustomerId <= customerIdRange.getMaximum
+
+
+    def next(): TransactionIteratorFactory.KeyVal[String,String] = {
+      val date = DataForger.randomDateInPastYears(50);
+      setIteratorState();
+
+      val product = randomProductOfCurrentlySelectedType
+      val key = StringUtils.join(asList("BigPetStore", "storeCode_" + state.name(),
+              elementsProcducedCount.toString), ",")
+      val value = StringUtils.join(asList(currentCustomerId, firstName, lastName, product.id,
+              product.name.toLowerCase, product.price, date), ",")
+
+      elementsProcducedCount += 1
+      new TransactionIteratorFactory.KeyVal(key, value)
+    }
+
+    private def setIteratorState() = {
+      /** Some customers come back for more :) We repeat a customer up to ten times */
+      if (repeatCount > 0) {
+        repeatCount -= 1
+      } else {
+        firstName = DataForger.firstName(r)
+        lastName = DataForger.lastName(r)
+        // this sometimes generates numbers much larger than 10. We don't really need Gaussian
+        // distribution since number of transactions per customer can be truly arbitrary.
+        repeatCount = (r.nextGaussian * 4f) toInt;
+        println("####Repeat: " + repeatCount)
+        currentCustomerId += 1
+        currentProductType = selectRandomProductType;
+      }
+    }
+
+    private def selectRandomProductType = {
+      ProductType.values.apply(r.nextInt(ProductType.values.length))
+    }
+
+    private def randomProductOfCurrentlySelectedType = {
+      currentProductType.getProducts.get(r.nextInt(currentProductType.getProducts.size))
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java
index 52b8079..e2f1f25 100644
--- a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java
+++ b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java
@@ -6,7 +6,7 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,7 +17,7 @@ package org.apache.bigtop.bigpetstore.generator;
 
 import static org.junit.Assert.assertFalse;
 
-import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory.STATE;
+import org.apache.bigtop.bigpetstore.generator.util.State;
 import org.apache.bigtop.bigpetstore.util.NumericalIdUtils;
 import org.junit.Test;
 
@@ -25,9 +25,9 @@ public class TestNumericalIdUtils {
 
     @Test
     public void testName() {
-        String strId= STATE.OK.name()+"_"+ "jay vyas";
+        String strId= State.OK.name()+"_"+ "jay vyas";
         long id = NumericalIdUtils.toId(strId);
-        String strId2= STATE.CO.name()+"_"+ "jay vyas";
+        String strId2= State.CO.name()+"_"+ "jay vyas";
         long id2 = NumericalIdUtils.toId(strId2);
         System.out.println(id + " " + id2);
         assertFalse(id==id2);

http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java
----------------------------------------------------------------------
diff --git a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java
index d68e36c..76de3d0 100755
--- a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java
+++ b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java
@@ -6,7 +6,7 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -24,7 +24,7 @@ import java.io.InputStreamReader;
 import java.util.Date;
 
 import org.apache.bigtop.bigpetstore.generator.BPSGenerator.props;
-import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory.STATE;
+import org.apache.bigtop.bigpetstore.generator.util.State;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -61,7 +61,7 @@ public class TestPetStoreTransactionGeneratorJob {
          * Run the job
          */
         Path output = new Path("petstoredata/" + (new Date()).toString());
-        Job createInput = BPSGenerator.createJob(output, c);
+        Job createInput = BPSGenerator.getCreateTransactionRecordsJob(output, c);
         createInput.submit();
         System.out.println(createInput);
         createInput.waitForCompletion(true);
@@ -83,10 +83,10 @@ public class TestPetStoreTransactionGeneratorJob {
             s = br.readLine();
             System.out.println("===>" + s);
             recordsSeen++;
-            if (s.contains(STATE.CT.name())) {
+            if (s.contains(State.CT.name())) {
                 CTseen = true;
             }
-            if (s.contains(STATE.AZ.name())) {
+            if (s.contains(State.AZ.name())) {
                 AZseen = true;
             }
         }


Mime
View raw message