devicemap-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From re...@apache.org
Subject svn commit: r1693352 - /devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java
Date Wed, 29 Jul 2015 21:47:32 GMT
Author: rezan
Date: Wed Jul 29 21:47:32 2015
New Revision: 1693352

URL: http://svn.apache.org/r1693352
Log:
changed tokenization from byte to char

Modified:
    devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java

Modified: devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java
URL: http://svn.apache.org/viewvc/devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java?rev=1693352&r1=1693351&r2=1693352&view=diff
==============================================================================
--- devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java (original)
+++ devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java Wed Jul 29 21:47:32 2015
@@ -33,7 +33,7 @@ public class DeviceMapClient {
   private String domainVersion;
 
   private List<Transformer> transformers;
-  private List<byte[]> tokenSeperators;
+  private List<String> tokenSeperators;
   private int ngramConcatSize;
 
   private String defaultId;
@@ -100,15 +100,17 @@ public class DeviceMapClient {
         }
         
         for(Iterator<JsonNode> i = inputParser.get("tokenSeperators").iterator(); i.hasNext();)
{
-          JsonNode tokenSeperator = i.next();
+          JsonNode tokenSeperatorNode = i.next();
 
-          if(tokenSeperator.asText().isEmpty()) {
+          if(tokenSeperatorNode.asText().isEmpty()) {
             throw new Exception("Empty tokenSeperator not allowed");
           }
 
-          tokenSeperators.add(tokenSeperator.asText().getBytes());
+          String tokenSeperator = tokenSeperatorNode.asText();
 
-          Main.log("Found tokenSeperator: '" + tokenSeperator.asText() + "'", 2);
+          tokenSeperators.add(tokenSeperator);
+
+          Main.log("Found tokenSeperator: '" + tokenSeperator + "'", 2);
         }
       }
 
@@ -258,47 +260,47 @@ public class DeviceMapClient {
 
     Main.log("Transformed: '" + transformed + "'", 3);
 
-    //tokenization using bytes
+    //tokenization
     List<String> tokens = new ArrayList<>();
 
-    byte[] source = transformed.getBytes();
+    String source = transformed;
     int sourcePos = 0;
-
-    byte[] dest = new byte[source.length];
-    int destPos = 0;
+    int destStart = 0;
+    int destEnd = 0;
 
     source:
-    while(sourcePos < source.length) {
+    while(sourcePos < source.length()) {
       seperator:
-      for(byte[] seperator : tokenSeperators) {
+      for(String seperator : tokenSeperators) {
         int i;
 
-        for(i = 0; i < seperator.length; i++) {
-          if(source[sourcePos + i] != seperator[i]) {
+        for(i = 0; i < seperator.length(); i++) {
+          if(sourcePos + i >= source.length() || source.charAt(sourcePos + i) != seperator.charAt(i))
{
             continue seperator;
           }
         }
 
-        if(destPos > 0) {
-          tokens.add(new String(dest, 0, destPos));
-          destPos = 0;
+        if(destEnd - destStart > 0) {
+          tokens.add(source.substring(destStart, destEnd));
         }
-        
+
         sourcePos += i;
+        destStart = destEnd = sourcePos;
         
         continue source;
       }
       
-      dest[destPos++] = source[sourcePos++];
+      sourcePos++;
+      destEnd++;
     }
 
-    if(destPos > 0) {
-      tokens.add(new String(dest, 0, destPos));
+    if(destEnd - destStart > 0) {
+      tokens.add(source.substring(destStart, destEnd));
     }
 
     Main.log("Tokens: " + tokens, 3);
 
-    List<String> ngrams = new ArrayList<>();
+    List<String> ngramTokenStream = new ArrayList<>();
 
     for(int i = 0; i < tokens.size(); i++) {
       String ngram = "";
@@ -310,12 +312,12 @@ public class DeviceMapClient {
         ngramParts.add(0, ngram);
       }
 
-      ngrams.addAll(ngramParts);
+      ngramTokenStream.addAll(ngramParts);
 
       ngramParts.clear();
     }
 
-    Main.log("Ngrams: " + ngrams, 3);
+    Main.log("Ngrams: " + ngramTokenStream, 3);
     
     return "";
   }



Mime
View raw message