hadoop-common-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Stefan Will <stefan.w...@gmx.net>
Subject Re: hadoop MapReduce and stop words
Date Sat, 16 May 2009 20:34:56 GMT
Just use a java.util.HashSet for this. There should only be a few dozen
stopwords, so load them into a HashSet when the Mapper starts up, and then
check your tokens against it while you're processing records.

-- Stefan


> From: tim robertson <timrobertson100@gmail.com>
> Reply-To: <core-user@hadoop.apache.org>
> Date: Sat, 16 May 2009 15:48:23 +0200
> To: <core-user@hadoop.apache.org>
> Subject: Re: hadoop MapReduce and stop words
> 
> Try and google "binary tree java" and you will get loads of hits...
> 
> This is a simple implementation but I am sure there are better ones
> that handle balancing better.
> 
> Cheers
> Tim
> 
> 
> public class BinaryTree {
> 
> public static void main(String[] args) {
> BinaryTree bt = new BinaryTree();
> 
> for (int i = 0; i < 10000; i++) {
> bt.insert("" + i);
> }
> 
> System.out.println(bt.lookup("999"));
> System.out.println(bt.lookup("100"));
> System.out.println(bt.lookup("a")); // should be null
> }
> 
> private Node root;
> 
> private static class Node {
> Node left;
> Node right;
> String value;
> 
> public Node(String value) {
> this.value = value;
> }
> }
> 
> public String lookup(String key) {
> return (lookup(root, key));
> }
> 
> private String lookup(Node node, String value) {
> if (node == null) {
> return (null);
> }
> 
> if (value.equals(node.value)) {
> return (node.value);
> } else if (value.compareTo(node.value) < 0) {
> return (lookup(node.left, value));
> } else {
> return (lookup(node.right, value));
> }
> }
> 
> public void insert(String value) {
> root = insert(root, value);
> }
> 
> private Node insert(Node node, String value) {
> if (node == null) {
> node = new Node(value);
> } else {
> if (value.compareTo(node.value) <= 0) {
> node.left = insert(node.left, value);
> } else {
> node.right = insert(node.right, value);
> }
> }
> 
> return (node);
> }
> }



Mime
View raw message