incubator-ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chen...@apache.org
Subject svn commit: r1402638 - in /incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils: kernel/ kernel/Kernel.java kernel/LinearKernel.java kernel/PolyKernel.java kernel/RBFKernel.java tree/ tree/FragmentUtils.java tree/SimpleTree.java
Date Fri, 26 Oct 2012 19:54:53 GMT
Author: chenpei
Date: Fri Oct 26 19:54:53 2012
New Revision: 1402638

URL: http://svn.apache.org/viewvc?rev=1402638&view=rev
Log:
CTAKES-62 moved the common utils functionality org.chboston.cnlp.ctakes.kernel.* and org.chboston.cnlp.ctakes.utils.*
out of the types system project  and into ctakes-utils

Added:
    incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/
    incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/Kernel.java
  (with props)
    incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/LinearKernel.java
  (with props)
    incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/PolyKernel.java
  (with props)
    incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/RBFKernel.java
  (with props)
    incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/tree/
    incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/tree/FragmentUtils.java
  (with props)
    incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/tree/SimpleTree.java
  (with props)

Added: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/Kernel.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/Kernel.java?rev=1402638&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/Kernel.java
(added)
+++ incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/Kernel.java
Fri Oct 26 19:54:53 2012
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.utils.kernel;
+
+import libsvm.svm_node;
+
+public abstract class Kernel{
+	protected static double dotProd(svm_node[] v1, svm_node[] v2){
+		double sim = 0.0;
+		int i = 0;
+		int j = 0;
+		
+		while(i < v1.length && j < v2.length){
+			if(v1[i].index == v2[j].index){
+				sim += (v1[i].value * v2[j].value);
+				i++;
+				j++;
+			}else if(v1[i].index < v2[j].index){
+				i++;
+			}else if(v1[i].index > v2[j].index){
+				j++;
+			}else{
+				System.err.println("Don't know how this is possible!");
+			}
+		}
+		return sim;
+	}
+	
+	public abstract double eval(Object o1, Object o2);
+
+}
+
+

Propchange: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/Kernel.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/LinearKernel.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/LinearKernel.java?rev=1402638&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/LinearKernel.java
(added)
+++ incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/LinearKernel.java
Fri Oct 26 19:54:53 2012
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.utils.kernel;
+
+public class LinearKernel extends PolyKernel{
+	public LinearKernel(){
+		this(false);
+	}
+	public LinearKernel(boolean norm){
+		super(1,0.0, norm);
+	}
+}
+

Propchange: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/LinearKernel.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/PolyKernel.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/PolyKernel.java?rev=1402638&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/PolyKernel.java
(added)
+++ incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/PolyKernel.java
Fri Oct 26 19:54:53 2012
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.utils.kernel;
+
+import java.util.concurrent.ConcurrentHashMap;
+
+//import opennlp.tools.parser.Parse;
+import libsvm.svm_node;
+
+public class PolyKernel extends Kernel{
+
+	private int degree;
+	private double coef;
+	private boolean norm;
+	private ConcurrentHashMap<Object,Double> normalizers = new ConcurrentHashMap<Object,Double>();
+
+	public PolyKernel(int degree, double coef, boolean norm){
+		this.degree = degree;
+		this.coef = coef;
+		this.norm = norm;
+	}
+
+	public PolyKernel(){
+		this.degree = 1;
+		this.coef = 0.0;
+		this.norm = false;
+	}
+
+//	public double eval(svm_node[] v1, svm_node[] v2){
+	public double eval(Object o1, Object o2){
+		svm_node[] v1 = (svm_node[]) o1;
+		svm_node[] v2 = (svm_node[]) o2;
+		
+		if(norm){
+			double norm1, norm2;
+			if(!normalizers.containsKey(o1)){
+				norm1 = sim(v1, v1);
+				normalizers.put(o1, norm1);
+			}else norm1 = normalizers.get(o1);
+			if(!normalizers.containsKey(o2)){
+				norm2 = sim(v2,v2);
+				normalizers.put(o2,norm2);
+			}else norm2 = normalizers.get(o2);
+			return sim(v1,v2) / Math.sqrt(norm1*norm2);
+		}else{
+			return sim(v1,v2);
+		}
+		
+	}
+
+	private final double sim(svm_node[] v1, svm_node[] v2){
+		double sim = dotProd(v1, v2) + coef;
+		if(degree != 1){
+			sim = Math.pow(sim, degree);
+		}
+		return sim; 
+	}
+/*jjj
+	private static double dotProd(svm_node[] v1, svm_node[] v2){
+		double sim = 0.0;
+		int i = 0;
+		int j = 0;
+		
+		while(i < v1.length && j < v2.length){
+			if(v1[i].index == v2[j].index){
+				sim += (v1[i].value * v2[j].value);
+				i++;
+				j++;
+			}else if(v1[i].index < v2[j].index){
+				i++;
+			}else if(v1[i].index > v2[j].index){
+				j++;
+			}else{
+				System.err.println("Don't know how this is possible!");
+			}
+		}
+		return sim;
+	}
+*/
+}

Propchange: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/PolyKernel.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/RBFKernel.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/RBFKernel.java?rev=1402638&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/RBFKernel.java
(added)
+++ incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/RBFKernel.java
Fri Oct 26 19:54:53 2012
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.utils.kernel;
+
+import libsvm.svm_node;
+
+public class RBFKernel extends Kernel {
+
+	double gamma = 0.0;
+
+	public RBFKernel(double gamma){
+		this.gamma = gamma;
+	}
+
+	public double eval(Object o1, Object o2){
+		svm_node[] v1 = (svm_node[]) o1;
+		svm_node[] v2 = (svm_node[]) o2;
+
+		// TODO implement!	
+		double squareNorm = 0;
+		int i=0;
+		int j=0;
+		
+		while(i < v1.length || j < v2.length){
+			if(i < v1.length && j < v2.length && v1[i].index == v2[j].index){
+				// return Math.exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
+				squareNorm += Math.pow(v1[i].value - v2[j].value, 2);
+				i++;
+				j++;
+			}else if((i < v1.length && j == v2.length) || (i < v1.length &&
v1[i].index < v2[j].index)){
+				squareNorm += Math.pow(v1[i].value, 2);
+				i++;
+			}else{
+				squareNorm += Math.pow(v2[j].value, 2);
+				j++;
+			}
+		}
+
+		double sim = Math.exp(-gamma * squareNorm);
+		return sim;
+	}
+}

Propchange: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/kernel/RBFKernel.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/tree/FragmentUtils.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/tree/FragmentUtils.java?rev=1402638&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/tree/FragmentUtils.java
(added)
+++ incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/tree/FragmentUtils.java
Fri Oct 26 19:54:53 2012
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.utils.tree;
+
+public class FragmentUtils {
+
+	public static SimpleTree frag2tree(String frag){
+		char[] chars = frag.toCharArray();
+		int ind = frag.indexOf('(', 1);
+		String type = frag.substring(1, ind);
+		SimpleTree root = new SimpleTree(type);
+		SimpleTree cur = root;
+		int lpar, rpar, oldind;
+		while(ind < chars.length){
+			if(chars[ind] == '('){
+				SimpleTree nt = null;
+				lpar = frag.indexOf('(', ind+1);
+				rpar = frag.indexOf(')', ind+1);
+				oldind = ind;
+				ind = (lpar < rpar  && lpar != -1 ? lpar : rpar);
+				type = frag.substring(oldind+1, ind);
+				nt = new SimpleTree(type, cur);
+				cur.addChild(nt);
+				cur = nt;
+			}else if(chars[ind] == ')'){
+				// if close paren, go up a level and move to next index,
+				// which is guaranteed to be another paren
+				cur = cur.parent;
+				ind++;
+			}
+		}
+		return root;
+	}	
+}

Propchange: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/tree/FragmentUtils.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/tree/SimpleTree.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/tree/SimpleTree.java?rev=1402638&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/tree/SimpleTree.java
(added)
+++ incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/tree/SimpleTree.java
Fri Oct 26 19:54:53 2012
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.utils.tree;
+
+import java.util.ArrayList;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class SimpleTree {
+	public String cat;
+	public ArrayList<SimpleTree> children;
+	public SimpleTree parent = null;
+	static Pattern ptPatt = Pattern.compile("\\(([^ (]+) +([^ )]+)\\)");
+	
+	public SimpleTree(String c){
+		this(c,null);
+	}
+	
+	public SimpleTree(String c, SimpleTree p){
+		cat = c;
+		children = new ArrayList<SimpleTree>();
+		parent = p;
+	}
+	
+	public void addChild(SimpleTree t){
+		children.add(t);
+	}
+	
+	public void insertChild(int index, SimpleTree t){
+		children.add(index, t);
+	}
+	
+	public SimpleTree removeChild(int index){
+		return children.remove(index);
+	}
+	
+	@Override
+	public String toString(){
+		StringBuffer buff = new StringBuffer();
+		
+		buff.append("(");
+		buff.append(cat);
+		buff.append(" ");
+		if(children.size() == 1 && children.get(0).children.size() == 0){
+			buff.append(children.get(0).cat);
+		}else{
+			for(int i = 0; i < children.size(); i++){
+				if(i != 0){
+					buff.append(" ");
+				}
+				buff.append(children.get(i).toString());
+			}
+		}
+		buff.append(")");
+		return buff.toString();
+	}
+	
+	public static SimpleTree fromString(String string){
+		SimpleTree tree = null;
+		
+		// pre-terminal case is the base case:
+		Matcher m = ptPatt.matcher(string);
+		if(m.matches()){
+			tree = new SimpleTree(m.group(1));
+			SimpleTree leaf = new SimpleTree(m.group(2));
+			tree.addChild(leaf);
+			leaf.parent = tree;
+		}else{
+			int firstWS = string.indexOf(' ');
+			tree = new SimpleTree(string.substring(1, firstWS));
+			String[] childStrings = splitChildren(string.substring(firstWS+1, string.length()-1));
+			for(int i = 0; i < childStrings.length; i++){
+				SimpleTree child = fromString(childStrings[i]);
+				child.parent = tree;
+				tree.addChild(child);
+			}
+		}
+		return tree;
+	}
+	
+	private static String[] splitChildren(String s){
+		ArrayList<String> children = new ArrayList<String>();
+		char[] chars = s.toCharArray();
+		int numParens = 0;
+		int startIndex = 0;
+		for(int i = 0; i < chars.length; i++){
+			if(chars[i] == '('){
+				numParens++;
+				if(numParens == 1){
+					startIndex = i;
+				}
+			}else if(chars[i] == ')'){
+				numParens--;
+				if(numParens == 0){
+					children.add(s.substring(startIndex, i+1));
+				}else if(numParens < 0){
+					break;
+				}
+			}
+		}
+		return children.toArray(new String[]{});
+	}
+	
+	public static void main(String[] args){
+		SimpleTree t = new SimpleTree("TOP");
+		t.addChild(new SimpleTree("S"));
+		t.children.get(0).addChild(new SimpleTree("NP"));
+		t.children.get(0).addChild(new SimpleTree("VP"));
+		t.children.get(0).children.get(0).addChild(new SimpleTree("i"));
+		t.children.get(0).children.get(1).addChild(new SimpleTree("ran"));
+		System.out.println(t.toString());
+		
+		SimpleTree t2 = SimpleTree.fromString("(S (NP (PRP it)) (VP (VBZ is) (JJ red)))");
+		System.out.println(t2.toString());
+	}
+}

Propchange: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/tree/SimpleTree.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message