commons-notifications mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From brit...@apache.org
Subject svn commit: r949214 [35/35] - in /websites/production/commons/content/sandbox/commons-text: ./ apidocs/ apidocs/org/apache/commons/text/diff/ apidocs/org/apache/commons/text/diff/class-use/ apidocs/org/apache/commons/text/names/ apidocs/org/apache/comm...
Date Sun, 26 Apr 2015 10:18:28 GMT
Added: websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/RegexTokenizer.html
==============================================================================
--- websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/RegexTokenizer.html
(added)
+++ websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/RegexTokenizer.html
Sun Apr 26 10:18:25 2015
@@ -0,0 +1,63 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head><meta http-equiv="content-type" content="text/html; charset=UTF-8" />
+<title>RegexTokenizer xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../../apidocs/org/apache/commons/text/similarity/RegexTokenizer.html">View
Javadoc</a></div><pre>
+<a class="jxr_linenumber" name="L1" href="#L1">1</a>   <em class="jxr_comment">/*</em>
+<a class="jxr_linenumber" name="L2" href="#L2">2</a>   <em class="jxr_comment">
* Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a class="jxr_linenumber" name="L3" href="#L3">3</a>   <em class="jxr_comment">
* contributor license agreements.  See the NOTICE file distributed with</em>
+<a class="jxr_linenumber" name="L4" href="#L4">4</a>   <em class="jxr_comment">
* this work for additional information regarding copyright ownership.</em>
+<a class="jxr_linenumber" name="L5" href="#L5">5</a>   <em class="jxr_comment">
* The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a class="jxr_linenumber" name="L6" href="#L6">6</a>   <em class="jxr_comment">
* (the "License"); you may not use this file except in compliance with</em>
+<a class="jxr_linenumber" name="L7" href="#L7">7</a>   <em class="jxr_comment">
* the License.  You may obtain a copy of the License at</em>
+<a class="jxr_linenumber" name="L8" href="#L8">8</a>   <em class="jxr_comment">
*</em>
+<a class="jxr_linenumber" name="L9" href="#L9">9</a>   <em class="jxr_comment">
*      <a href="http://www.apache.org/licenses/LICENSE-2." target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.</a>0</em>
+<a class="jxr_linenumber" name="L10" href="#L10">10</a>  <em class="jxr_comment">
*</em>
+<a class="jxr_linenumber" name="L11" href="#L11">11</a>  <em class="jxr_comment">
* Unless required by applicable law or agreed to in writing, software</em>
+<a class="jxr_linenumber" name="L12" href="#L12">12</a>  <em class="jxr_comment">
* distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a class="jxr_linenumber" name="L13" href="#L13">13</a>  <em class="jxr_comment">
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a class="jxr_linenumber" name="L14" href="#L14">14</a>  <em class="jxr_comment">
* See the License for the specific language governing permissions and</em>
+<a class="jxr_linenumber" name="L15" href="#L15">15</a>  <em class="jxr_comment">
* limitations under the License.</em>
+<a class="jxr_linenumber" name="L16" href="#L16">16</a>  <em class="jxr_comment">
*/</em>
+<a class="jxr_linenumber" name="L17" href="#L17">17</a>  <strong class="jxr_keyword">package</strong>
org.apache.commons.text.similarity;
+<a class="jxr_linenumber" name="L18" href="#L18">18</a>  
+<a class="jxr_linenumber" name="L19" href="#L19">19</a>  <strong class="jxr_keyword">import</strong>
java.util.ArrayList;
+<a class="jxr_linenumber" name="L20" href="#L20">20</a>  <strong class="jxr_keyword">import</strong>
java.util.List;
+<a class="jxr_linenumber" name="L21" href="#L21">21</a>  <strong class="jxr_keyword">import</strong>
java.util.regex.Matcher;
+<a class="jxr_linenumber" name="L22" href="#L22">22</a>  <strong class="jxr_keyword">import</strong>
java.util.regex.Pattern;
+<a class="jxr_linenumber" name="L23" href="#L23">23</a>  
+<a class="jxr_linenumber" name="L24" href="#L24">24</a>  <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L25" href="#L25">25</a>  <em class="jxr_javadoccomment">
* A simple word tokenizer that utilizes regex to find words. It applies a regex</em>
+<a class="jxr_linenumber" name="L26" href="#L26">26</a>  <em class="jxr_javadoccomment">
* {@code}(\w)+{@code} over the input text to extract words from a given character</em>
+<a class="jxr_linenumber" name="L27" href="#L27">27</a>  <em class="jxr_javadoccomment">
* sequence.</em>
+<a class="jxr_linenumber" name="L28" href="#L28">28</a>  <em class="jxr_javadoccomment">
*/</em>
+<a class="jxr_linenumber" name="L29" href="#L29">29</a>  <strong class="jxr_keyword">class</strong>
<a href="../../../../../org/apache/commons/text/similarity/RegexTokenizer.html">RegexTokenizer</a>
<strong class="jxr_keyword">implements</strong> Tokenizer&lt;CharSequence&gt;
{
+<a class="jxr_linenumber" name="L30" href="#L30">30</a>  
+<a class="jxr_linenumber" name="L31" href="#L31">31</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L32" href="#L32">32</a>  <em class="jxr_javadoccomment">
    * {@inheritDoc}</em>
+<a class="jxr_linenumber" name="L33" href="#L33">33</a>  <em class="jxr_javadoccomment">
    *</em>
+<a class="jxr_linenumber" name="L34" href="#L34">34</a>  <em class="jxr_javadoccomment">
    * @throws IllegalArgumentException if the input text is blank</em>
+<a class="jxr_linenumber" name="L35" href="#L35">35</a>  <em class="jxr_javadoccomment">
    */</em>
+<a class="jxr_linenumber" name="L36" href="#L36">36</a>      @Override
+<a class="jxr_linenumber" name="L37" href="#L37">37</a>      <strong class="jxr_keyword">public</strong>
CharSequence[] tokenize(CharSequence text) {
+<a class="jxr_linenumber" name="L38" href="#L38">38</a>          <strong class="jxr_keyword">if</strong>
(text == <strong class="jxr_keyword">null</strong> || text.toString().trim().equals(<span
class="jxr_string">""</span>)) {
+<a class="jxr_linenumber" name="L39" href="#L39">39</a>              <strong
class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong>
IllegalArgumentException(<span class="jxr_string">"Invalid text"</span>);
+<a class="jxr_linenumber" name="L40" href="#L40">40</a>          }
+<a class="jxr_linenumber" name="L41" href="#L41">41</a>          Pattern pattern
= Pattern.compile(<span class="jxr_string">"(&#92;&#92;w)+"</span>);
+<a class="jxr_linenumber" name="L42" href="#L42">42</a>          Matcher matcher
= pattern.matcher(text.toString());
+<a class="jxr_linenumber" name="L43" href="#L43">43</a>          List&lt;String&gt;
tokens = <strong class="jxr_keyword">new</strong> ArrayList&lt;String&gt;();
+<a class="jxr_linenumber" name="L44" href="#L44">44</a>          <strong class="jxr_keyword">while</strong>
(matcher.find()) {
+<a class="jxr_linenumber" name="L45" href="#L45">45</a>              tokens.add(matcher.group(0));
+<a class="jxr_linenumber" name="L46" href="#L46">46</a>          }
+<a class="jxr_linenumber" name="L47" href="#L47">47</a>          <strong class="jxr_keyword">return</strong>
tokens.toArray(<strong class="jxr_keyword">new</strong> String[0]);
+<a class="jxr_linenumber" name="L48" href="#L48">48</a>      }
+<a class="jxr_linenumber" name="L49" href="#L49">49</a>  
+<a class="jxr_linenumber" name="L50" href="#L50">50</a>  }
+</pre>
+<hr/>
+<div id="footer">Copyright &#169; 2014&#x2013;2015 <a href="http://www.apache.org/">The
Apache Software Foundation</a>. All rights reserved.</div>
+</body>
+</html>
\ No newline at end of file

Propchange: websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/RegexTokenizer.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/Tokenizer.html
==============================================================================
--- websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/Tokenizer.html
(added)
+++ websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/Tokenizer.html
Sun Apr 26 10:18:25 2015
@@ -0,0 +1,47 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head><meta http-equiv="content-type" content="text/html; charset=UTF-8" />
+<title>Tokenizer xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../../apidocs/org/apache/commons/text/similarity/Tokenizer.html">View
Javadoc</a></div><pre>
+<a class="jxr_linenumber" name="L1" href="#L1">1</a>   <em class="jxr_comment">/*</em>
+<a class="jxr_linenumber" name="L2" href="#L2">2</a>   <em class="jxr_comment">
* Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a class="jxr_linenumber" name="L3" href="#L3">3</a>   <em class="jxr_comment">
* contributor license agreements.  See the NOTICE file distributed with</em>
+<a class="jxr_linenumber" name="L4" href="#L4">4</a>   <em class="jxr_comment">
* this work for additional information regarding copyright ownership.</em>
+<a class="jxr_linenumber" name="L5" href="#L5">5</a>   <em class="jxr_comment">
* The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a class="jxr_linenumber" name="L6" href="#L6">6</a>   <em class="jxr_comment">
* (the "License"); you may not use this file except in compliance with</em>
+<a class="jxr_linenumber" name="L7" href="#L7">7</a>   <em class="jxr_comment">
* the License.  You may obtain a copy of the License at</em>
+<a class="jxr_linenumber" name="L8" href="#L8">8</a>   <em class="jxr_comment">
*</em>
+<a class="jxr_linenumber" name="L9" href="#L9">9</a>   <em class="jxr_comment">
*      <a href="http://www.apache.org/licenses/LICENSE-2." target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.</a>0</em>
+<a class="jxr_linenumber" name="L10" href="#L10">10</a>  <em class="jxr_comment">
*</em>
+<a class="jxr_linenumber" name="L11" href="#L11">11</a>  <em class="jxr_comment">
* Unless required by applicable law or agreed to in writing, software</em>
+<a class="jxr_linenumber" name="L12" href="#L12">12</a>  <em class="jxr_comment">
* distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a class="jxr_linenumber" name="L13" href="#L13">13</a>  <em class="jxr_comment">
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a class="jxr_linenumber" name="L14" href="#L14">14</a>  <em class="jxr_comment">
* See the License for the specific language governing permissions and</em>
+<a class="jxr_linenumber" name="L15" href="#L15">15</a>  <em class="jxr_comment">
* limitations under the License.</em>
+<a class="jxr_linenumber" name="L16" href="#L16">16</a>  <em class="jxr_comment">
*/</em>
+<a class="jxr_linenumber" name="L17" href="#L17">17</a>  <strong class="jxr_keyword">package</strong>
org.apache.commons.text.similarity;
+<a class="jxr_linenumber" name="L18" href="#L18">18</a>  
+<a class="jxr_linenumber" name="L19" href="#L19">19</a>  <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L20" href="#L20">20</a>  <em class="jxr_javadoccomment">
* A tokenizer. Can produce arrays of tokens from a given type.</em>
+<a class="jxr_linenumber" name="L21" href="#L21">21</a>  <em class="jxr_javadoccomment">
*</em>
+<a class="jxr_linenumber" name="L22" href="#L22">22</a>  <em class="jxr_javadoccomment">
* @param &lt;T&gt; given type</em>
+<a class="jxr_linenumber" name="L23" href="#L23">23</a>  <em class="jxr_javadoccomment">
*/</em>
+<a class="jxr_linenumber" name="L24" href="#L24">24</a>  <strong class="jxr_keyword">interface</strong>
Tokenizer&lt;T&gt; {
+<a class="jxr_linenumber" name="L25" href="#L25">25</a>  
+<a class="jxr_linenumber" name="L26" href="#L26">26</a>      <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="L27" href="#L27">27</a>  <em class="jxr_javadoccomment">
    * Returns an array of tokens.</em>
+<a class="jxr_linenumber" name="L28" href="#L28">28</a>  <em class="jxr_javadoccomment">
    *</em>
+<a class="jxr_linenumber" name="L29" href="#L29">29</a>  <em class="jxr_javadoccomment">
    * @param text input text</em>
+<a class="jxr_linenumber" name="L30" href="#L30">30</a>  <em class="jxr_javadoccomment">
    * @return array of tokens</em>
+<a class="jxr_linenumber" name="L31" href="#L31">31</a>  <em class="jxr_javadoccomment">
    */</em>
+<a class="jxr_linenumber" name="L32" href="#L32">32</a>      T[] tokenize(CharSequence
text);
+<a class="jxr_linenumber" name="L33" href="#L33">33</a>  
+<a class="jxr_linenumber" name="L34" href="#L34">34</a>  }
+</pre>
+<hr/>
+<div id="footer">Copyright &#169; 2014&#x2013;2015 <a href="http://www.apache.org/">The
Apache Software Foundation</a>. All rights reserved.</div>
+</body>
+</html>
\ No newline at end of file

Propchange: websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/Tokenizer.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/package-frame.html
==============================================================================
--- websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/package-frame.html
(original)
+++ websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/package-frame.html
Sun Apr 26 10:18:25 2015
@@ -22,6 +22,9 @@
             	<a href="CosineSimilarity.html" target="classFrame">CosineSimilarity</a>
           	</li>
           	          	<li>
+            	<a href="Counter.html" target="classFrame">Counter</a>
+          	</li>
+          	          	<li>
             	<a href="EditDistance.html" target="classFrame">EditDistance</a>
           	</li>
           	          	<li>
@@ -39,6 +42,12 @@
           	          	<li>
             	<a href="LevenshteinDistance.html" target="classFrame">LevenshteinDistance</a>
           	</li>
+          	          	<li>
+            	<a href="RegexTokenizer.html" target="classFrame">RegexTokenizer</a>
+          	</li>
+          	          	<li>
+            	<a href="Tokenizer.html" target="classFrame">Tokenizer</a>
+          	</li>
           	      	</ul>
 
 	</body>

Modified: websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/package-summary.html
==============================================================================
--- websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/package-summary.html
(original)
+++ websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/package-summary.html
Sun Apr 26 10:18:25 2015
@@ -47,6 +47,11 @@
             	</tr>
 				            	<tr>
               		<td>
+                		<a href="Counter.html" target="classFrame">Counter</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
                 		<a href="EditDistance.html" target="classFrame">EditDistance</a>
               		</td>
             	</tr>
@@ -75,6 +80,16 @@
                 		<a href="LevenshteinDistance.html" target="classFrame">LevenshteinDistance</a>
               		</td>
             	</tr>
+				            	<tr>
+              		<td>
+                		<a href="RegexTokenizer.html" target="classFrame">RegexTokenizer</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="Tokenizer.html" target="classFrame">Tokenizer</a>
+              		</td>
+            	</tr>
 				        	</tbody>
       	</table>
 		

Modified: websites/production/commons/content/sandbox/commons-text/xref/overview-frame.html
==============================================================================
--- websites/production/commons/content/sandbox/commons-text/xref/overview-frame.html (original)
+++ websites/production/commons/content/sandbox/commons-text/xref/overview-frame.html Sun
Apr 26 10:18:25 2015
@@ -24,11 +24,7 @@
 						<li>
 				<a href="org/apache/commons/text/similarity/package-frame.html" target="packageFrame">org.apache.commons.text.similarity</a>
 			</li>
-						<li>
-				<a href="org/apache/commons/text/similarity/internal/package-frame.html" target="packageFrame">org.apache.commons.text.similarity.internal</a>
-			</li>
 					</ul>
 
 	</body>
 </html>
-

Modified: websites/production/commons/content/sandbox/commons-text/xref/overview-summary.html
==============================================================================
--- websites/production/commons/content/sandbox/commons-text/xref/overview-summary.html (original)
+++ websites/production/commons/content/sandbox/commons-text/xref/overview-summary.html Sun
Apr 26 10:18:25 2015
@@ -48,11 +48,6 @@
                 		<a href="org/apache/commons/text/similarity/package-summary.html">org.apache.commons.text.similarity</a>
               		</td>
             	</tr>
-	          	            	<tr>
-              		<td>
-                		<a href="org/apache/commons/text/similarity/internal/package-summary.html">org.apache.commons.text.similarity.internal</a>
-              		</td>
-            	</tr>
 	          	        	</tbody>
       	</table>
 		

Modified: websites/production/commons/content/sandbox/commons-text/xref/stylesheet.css
==============================================================================
--- websites/production/commons/content/sandbox/commons-text/xref/stylesheet.css (original)
+++ websites/production/commons/content/sandbox/commons-text/xref/stylesheet.css Sun Apr 26
10:18:25 2015
@@ -111,4 +111,4 @@ hr {
 .jxr_keyword
 {
     color: #000;
-}
+}
\ No newline at end of file



Mime
View raw message