commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From brit...@apache.org
Subject [2/6] [text] Move classes from the internal package into the package where they are used and make them package private.
Date Sun, 19 Apr 2015 09:02:33 GMT
Move classes from the internal package into the package where they
are used and make them package private.


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/df681238
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/df681238
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/df681238

Branch: refs/heads/master
Commit: df681238bf5bcb2fece950b644a7d00a712d0cc8
Parents: 75db6de
Author: Benedikt Ritter <britter@apache.org>
Authored: Sun Apr 19 10:32:13 2015 +0200
Committer: Benedikt Ritter <britter@apache.org>
Committed: Sun Apr 19 10:37:50 2015 +0200

----------------------------------------------------------------------
 .../commons/text/similarity/CosineDistance.java |  6 --
 .../apache/commons/text/similarity/Counter.java | 60 ++++++++++++++++++++
 .../commons/text/similarity/RegexTokenizer.java | 50 ++++++++++++++++
 .../commons/text/similarity/Tokenizer.java      | 34 +++++++++++
 .../text/similarity/internal/Counter.java       | 60 --------------------
 .../similarity/internal/RegexTokenizer.java     | 50 ----------------
 .../text/similarity/internal/Tokenizer.java     | 34 -----------
 .../text/similarity/internal/package-info.java  | 23 --------
 .../commons/text/similarity/package-info.java   |  2 +-
 9 files changed, 145 insertions(+), 174 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/df681238/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
index 2fa4515..c5e8853 100644
--- a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
+++ b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
@@ -18,17 +18,11 @@ package org.apache.commons.text.similarity;
 
 import java.util.Map;
 
-import org.apache.commons.text.similarity.internal.Counter;
-import org.apache.commons.text.similarity.internal.RegexTokenizer;
-import org.apache.commons.text.similarity.internal.Tokenizer;
-
 /**
  * Measures the cosine distance between two character sequences.
  *
  * <p>It utilizes the CosineSimilarity to compute the distance. Character sequences
  * are converted into vectors through a simple tokenizer that works with </p>
- *
- * @see org.apache.commons.text.similarity.internal.RegexTokenizer
  */
 public class CosineDistance implements EditDistance<Double> {
     /**

http://git-wip-us.apache.org/repos/asf/commons-text/blob/df681238/src/main/java/org/apache/commons/text/similarity/Counter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/Counter.java b/src/main/java/org/apache/commons/text/similarity/Counter.java
new file mode 100644
index 0000000..5eefc51
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/Counter.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Java implementation of Python's collections Counter module.
+ *
+ * <p>It counts how many times each element provided occurred in an array and
+ * returns a dict with the element as key and the count as value.</p>
+ *
+ * @see <a href="https://docs.python.org/dev/library/collections.html#collections.Counter">
+ * https://docs.python.org/dev/library/collections.html#collections.Counter</a>
+ */
+final class Counter {
+
+    /**
+     * Hidden constructor.
+     */
+    private Counter() {
+        super();
+    }
+
+    /**
+     * It counts how many times each element provided occurred in an array and
+     * returns a dict with the element as key and the count as value.
+     *
+     * @param tokens array of tokens
+     * @return dict, where the elements are key, and the count the value
+     */
+    public static Map<CharSequence, Integer> of(CharSequence[] tokens) {
+        final Map<CharSequence, Integer> innerCounter = new HashMap<CharSequence,
Integer>();
+        for (CharSequence token : tokens) {
+            if (innerCounter.containsKey(token)) {
+                int value = innerCounter.get(token);
+                innerCounter.put(token, ++value);
+            } else {
+                innerCounter.put(token, 1);
+            }
+        }
+        return innerCounter;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/df681238/src/main/java/org/apache/commons/text/similarity/RegexTokenizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/RegexTokenizer.java b/src/main/java/org/apache/commons/text/similarity/RegexTokenizer.java
new file mode 100644
index 0000000..5a6c5d3
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/RegexTokenizer.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * A simple word tokenizer that utilizes regex to find words. It applies a regex
+ * {@code}(\w)+{@code} over the input text to extract words from a given character
+ * sequence.
+ */
+class RegexTokenizer implements Tokenizer<CharSequence> {
+
+    /**
+     * {@inheritDoc}
+     *
+     * @throws IllegalArgumentException if the input text is blank
+     */
+    @Override
+    public CharSequence[] tokenize(CharSequence text) {
+        if (text == null || text.toString().trim().equals("")) {
+            throw new IllegalArgumentException("Invalid text");
+        }
+        Pattern pattern = Pattern.compile("(\\w)+");
+        Matcher matcher = pattern.matcher(text.toString());
+        List<String> tokens = new ArrayList<String>();
+        while (matcher.find()) {
+            tokens.add(matcher.group(0));
+        }
+        return tokens.toArray(new String[0]);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/df681238/src/main/java/org/apache/commons/text/similarity/Tokenizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/Tokenizer.java b/src/main/java/org/apache/commons/text/similarity/Tokenizer.java
new file mode 100644
index 0000000..0a69d24
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/Tokenizer.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+/**
+ * A tokenizer. Can produce arrays of tokens from a given type.
+ *
+ * @param <T> given type
+ */
+interface Tokenizer<T> {
+
+    /**
+     * Returns an array of tokens.
+     *
+     * @param text input text
+     * @return array of tokens
+     */
+    T[] tokenize(CharSequence text);
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/df681238/src/main/java/org/apache/commons/text/similarity/internal/Counter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/internal/Counter.java b/src/main/java/org/apache/commons/text/similarity/internal/Counter.java
deleted file mode 100644
index c0dd2e6..0000000
--- a/src/main/java/org/apache/commons/text/similarity/internal/Counter.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.similarity.internal;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Java implementation of Python's collections Counter module.
- *
- * <p>It counts how many times each element provided occurred in an array and
- * returns a dict with the element as key and the count as value.</p>
- *
- * @see <a href="https://docs.python.org/dev/library/collections.html#collections.Counter">
- * https://docs.python.org/dev/library/collections.html#collections.Counter</a>
- */
-public final class Counter {
-
-    /**
-     * Hidden constructor.
-     */
-    private Counter() {
-        super();
-    }
-
-    /**
-     * It counts how many times each element provided occurred in an array and
-     * returns a dict with the element as key and the count as value.
-     *
-     * @param tokens array of tokens
-     * @return dict, where the elements are key, and the count the value
-     */
-    public static Map<CharSequence, Integer> of(CharSequence[] tokens) {
-        final Map<CharSequence, Integer> innerCounter = new HashMap<CharSequence,
Integer>();
-        for (CharSequence token : tokens) {
-            if (innerCounter.containsKey(token)) {
-                int value = innerCounter.get(token);
-                innerCounter.put(token, ++value);
-            } else {
-                innerCounter.put(token, 1);
-            }
-        }
-        return innerCounter;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/df681238/src/main/java/org/apache/commons/text/similarity/internal/RegexTokenizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/internal/RegexTokenizer.java
b/src/main/java/org/apache/commons/text/similarity/internal/RegexTokenizer.java
deleted file mode 100644
index 082ac05..0000000
--- a/src/main/java/org/apache/commons/text/similarity/internal/RegexTokenizer.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.similarity.internal;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * A simple word tokenizer that utilizes regex to find words. It applies a regex
- * {@code}(\w)+{@code} over the input text to extract words from a given character
- * sequence.
- */
-public class RegexTokenizer implements Tokenizer<CharSequence> {
-
-    /**
-     * {@inheritDoc}
-     *
-     * @throws IllegalArgumentException if the input text is blank
-     */
-    @Override
-    public CharSequence[] tokenize(CharSequence text) {
-        if (text == null || text.toString().trim().equals("")) {
-            throw new IllegalArgumentException("Invalid text");
-        }
-        Pattern pattern = Pattern.compile("(\\w)+");
-        Matcher matcher = pattern.matcher(text.toString());
-        List<String> tokens = new ArrayList<String>();
-        while (matcher.find()) {
-            tokens.add(matcher.group(0));
-        }
-        return tokens.toArray(new String[0]);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/df681238/src/main/java/org/apache/commons/text/similarity/internal/Tokenizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/internal/Tokenizer.java b/src/main/java/org/apache/commons/text/similarity/internal/Tokenizer.java
deleted file mode 100644
index 9dc63e4..0000000
--- a/src/main/java/org/apache/commons/text/similarity/internal/Tokenizer.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.similarity.internal;
-
-/**
- * A tokenizer. Can produce arrays of tokens from a given type.
- *
- * @param <T> given type
- */
-public interface Tokenizer<T> {
-
-    /**
-     * Returns an array of tokens.
-     *
-     * @param text input text
-     * @return array of tokens
-     */
-    T[] tokenize(CharSequence text);
-
-}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/df681238/src/main/java/org/apache/commons/text/similarity/internal/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/internal/package-info.java b/src/main/java/org/apache/commons/text/similarity/internal/package-info.java
deleted file mode 100644
index 548e2b7..0000000
--- a/src/main/java/org/apache/commons/text/similarity/internal/package-info.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * Classes used internally by similarity algorithms. Internal use only, backward compatibility
- * not guaranteed.
- *
- * @since 0.1
- */
-package org.apache.commons.text.similarity.internal;

http://git-wip-us.apache.org/repos/asf/commons-text/blob/df681238/src/main/java/org/apache/commons/text/similarity/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/similarity/package-info.java b/src/main/java/org/apache/commons/text/similarity/package-info.java
index bd1e400..d2a19a6 100644
--- a/src/main/java/org/apache/commons/text/similarity/package-info.java
+++ b/src/main/java/org/apache/commons/text/similarity/package-info.java
@@ -33,7 +33,7 @@
  * </ul>
  *
  * <p>The {@link org.apache.commons.text.similarity.CosineDistance Cosine Distance}
- * utilises a {@link org.apache.commons.text.similarity.internal.RegexTokenizer regular expression
tokenizer (\w+)}.
+ * utilises a {@link org.apache.commons.text.similarity.RegexTokenizer regular expression
tokenizer (\w+)}.
  * And the {@link org.apache.commons.text.similarity.LevenshteinDistance Levenshtein Distance}'s
  * behaviour can be changed to take into consideration a maximum throughput.</p>
  *


Mime
View raw message