devicemap-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From re...@apache.org
Subject svn commit: r1693327 - in /devicemap/trunk/clients/2.0/reference/src: DeviceMapClient.java Main.java
Date Wed, 29 Jul 2015 18:04:04 GMT
Author: rezan
Date: Wed Jul 29 18:04:04 2015
New Revision: 1693327

URL: http://svn.apache.org/r1693327
Log:
tokenization

Modified:
    devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java
    devicemap/trunk/clients/2.0/reference/src/Main.java

Modified: devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java
URL: http://svn.apache.org/viewvc/devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java?rev=1693327&r1=1693326&r2=1693327&view=diff
==============================================================================
--- devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java (original)
+++ devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java Wed Jul 29 18:04:04 2015
@@ -33,7 +33,7 @@ public class DeviceMapClient {
   private String domainVersion;
 
   private List<Transformer> transformers;
-  private List<String> tokenSeperators;
+  private List<byte[]> tokenSeperators;
   private int ngramConcatSize;
 
   private String defaultId;
@@ -43,7 +43,7 @@ public class DeviceMapClient {
     domainVersion = null;
 
     transformers = new ArrayList<Transformer>();
-    tokenSeperators = new ArrayList<String>();
+    tokenSeperators = new ArrayList<byte[]>();
     ngramConcatSize = 1;
 
     defaultId = null;
@@ -92,13 +92,17 @@ public class DeviceMapClient {
 
       if(get(inputParser, "tokenSeperators").isArray()) {
         if(patch) {
-          tokenSeperators = new ArrayList<String>();
+          tokenSeperators = new ArrayList<byte[]>();
         }
         
         for(Iterator<JsonNode> i = inputParser.get("tokenSeperators").iterator(); i.hasNext();)
{
           JsonNode tokenSeperator = i.next();
 
-          tokenSeperators.add(tokenSeperator.asText());
+          if(tokenSeperator.asText().isEmpty()) {
+            throw new Exception("Empty tokenSeperator not allowed");
+          }
+
+          tokenSeperators.add(tokenSeperator.asText().getBytes());
 
           Main.log("  Found tokenSeperator: '" + tokenSeperator.asText() + "'");
         }
@@ -237,6 +241,46 @@ public class DeviceMapClient {
     }
 
     Main.log("  Transformed: '" + transformed + "'");
+
+    //tokenization using bytes
+    List<String> tokens = new ArrayList<String>();
+
+    byte[] source = transformed.getBytes();
+    int sourcePos = 0;
+
+    byte[] dest = new byte[source.length];
+    int destPos = 0;
+
+    source:
+    while(sourcePos < source.length) {
+      seperator:
+      for(byte[] seperator : tokenSeperators) {
+        int i;
+
+        for(i = 0; i < seperator.length; i++) {
+          if(source[sourcePos + i] != seperator[i]) {
+            continue seperator;
+          }
+        }
+
+        if(destPos > 0) {
+          tokens.add(new String(dest, 0, destPos));
+          destPos = 0;
+        }
+        
+        sourcePos += i;
+        
+        continue source;
+      }
+      
+      dest[destPos++] = source[sourcePos++];
+    }
+
+    if(destPos > 0) {
+      tokens.add(new String(dest, 0, destPos));
+    }
+
+    Main.log("  Tokens: " + tokens);
     
     return "";
   }

Modified: devicemap/trunk/clients/2.0/reference/src/Main.java
URL: http://svn.apache.org/viewvc/devicemap/trunk/clients/2.0/reference/src/Main.java?rev=1693327&r1=1693326&r2=1693327&view=diff
==============================================================================
--- devicemap/trunk/clients/2.0/reference/src/Main.java (original)
+++ devicemap/trunk/clients/2.0/reference/src/Main.java Wed Jul 29 18:04:04 2015
@@ -83,6 +83,7 @@ public class Main {
 
     if(testString != null) {
       Main.log("Test string: '" + testString + "'");
+      String result = client.classify(testString);
     }
 
     if(failure) {



Mime
View raw message