ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vjapa...@apache.org
Subject svn commit: r1551254 [21/26] - in /ctakes/branches/ytex: ctakes-ytex-res/ ctakes-ytex-res/.settings/ ctakes-ytex-res/src/ ctakes-ytex-res/src/main/ ctakes-ytex-res/src/main/resources/ ctakes-ytex-res/src/main/resources/org/ ctakes-ytex-res/src/main/res...
Date Mon, 16 Dec 2013 16:30:40 GMT
Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NormKernel.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NormKernel.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NormKernel.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NormKernel.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,114 @@
+package org.apache.ctakes.ytex.kernel.evaluator;
+
+import net.sf.ehcache.Cache;
+import net.sf.ehcache.CacheManager;
+import net.sf.ehcache.Element;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.ctakes.ytex.kernel.tree.Node;
+
+
+/**
+ * Return norm of delegate kernel: <code>k(x,y)/sqrt(k(x,x)*k(y,y)</code>. If
+ * the object is a org.apache.ctakes.ytex.kernel.Node, then save the norm in the node for future
+ * reference. else if cacheNorm = true, save the norm in the cache for future
+ * reference. If the delegate kernel is fast (e.g. it's using caching itself /
+ * trivial operation) caching the norm will slow things down.
+ * 
+ * @author vijay
+ * 
+ */
+public class NormKernel implements Kernel {
+	private static final Log log = LogFactory.getLog(NormKernel.class);
+
+	private Cache normCache;
+	private CacheManager cacheManager;
+	private Kernel delegateKernel;
+	private boolean cacheNorm = true;
+
+	public boolean isCacheNorm() {
+		return cacheNorm;
+	}
+
+	public void setCacheNorm(boolean cacheNorm) {
+		this.cacheNorm = cacheNorm;
+	}
+
+	public NormKernel(Kernel delegateKernel) {
+		this.delegateKernel = delegateKernel;
+	}
+
+	public NormKernel() {
+		super();
+	}
+
+	public CacheManager getCacheManager() {
+		return cacheManager;
+	}
+
+	public void setCacheManager(CacheManager cacheManager) {
+		this.cacheManager = cacheManager;
+	}
+
+	public Kernel getDelegateKernel() {
+		return delegateKernel;
+	}
+
+	public void setDelegateKernel(Kernel delegateKernel) {
+		this.delegateKernel = delegateKernel;
+	}
+
+	/**
+	 * compute the norm.
+	 * 
+	 * @param o1
+	 * @return
+	 */
+	public double getNorm(Object o1) {
+		Double norm = null;
+		if (o1 != null) {
+			if (o1 instanceof Node) {
+				// look in node if this is a node
+				norm = ((Node) o1).getNorm();
+			} else if (this.isCacheNorm()) {
+				// look in cache otherwise
+				Element cachedNorm = null;
+				cachedNorm = normCache.get(o1);
+				if (cachedNorm != null) {
+					norm = (Double) cachedNorm.getValue();
+				}
+			}
+			if (norm == null) {
+				// couldn't get cached norm - compute it
+				norm = Math.sqrt(delegateKernel.evaluate(o1, o1));
+			}
+			if (o1 instanceof Node) {
+				((Node) o1).setNorm(norm);
+			} else if (this.isCacheNorm()) {
+				normCache.put(new Element(o1, norm));
+			}
+		}
+		return norm;
+	}
+
+	public double evaluate(Object o1, Object o2) {
+		double d = 0;
+		if (o1 == null || o2 == null) {
+			d = 0;
+		} else {
+			double norm1 = getNorm(o1);
+			double norm2 = getNorm(o2);
+			if (norm1 != 0 && norm2 != 0)
+				d = delegateKernel.evaluate(o1, o2) / (norm1 * norm2);
+		}
+		if (log.isTraceEnabled()) {
+			log.trace("K<" + o1 + "," + o2 + "> = " + d);
+		}
+		return d;
+	}
+
+	public void init() {
+		normCache = cacheManager.getCache("normCache");
+	}
+}
\ No newline at end of file

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/ProductKernel.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/ProductKernel.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/ProductKernel.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/ProductKernel.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,47 @@
+package org.apache.ctakes.ytex.kernel.evaluator;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * compute the product of delegate kernels
+ * 
+ * @author vijay
+ * 
+ */
+public class ProductKernel extends CacheKernel {
+	private static final Log log = LogFactory.getLog(ProductKernel.class);
+	/**
+	 * use array instead of list. when running thread dumps, see a lot of action
+	 * in list.size(). may be a fluke, but can't hurt
+	 */
+	Kernel[] delegateKernels;
+
+	public List<Kernel> getDelegateKernels() {
+		return Arrays.asList(delegateKernels);
+	}
+
+	public void setDelegateKernels(List<Kernel> delegateKernels) {
+		this.delegateKernels = new Kernel[delegateKernels.size()];
+		for (int i = 0; i < this.delegateKernels.length; i++)
+			this.delegateKernels[i] = delegateKernels.get(i);
+	}
+
+	@Override
+	public double innerEvaluate(Object o1, Object o2) {
+		double d = 1;
+		for (Kernel k : delegateKernels) {
+			d *= k.evaluate(o1, o2);
+			if (d == 0)
+				break;
+		}
+		if (log.isTraceEnabled()) {
+			log.trace(new StringBuilder("K<").append(o1).append(",").append(o2)
+					.append("> = ").append(d));
+		}
+		return d;
+	}
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticSimKernel.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticSimKernel.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticSimKernel.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticSimKernel.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,139 @@
+package org.apache.ctakes.ytex.kernel.evaluator;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.ctakes.ytex.kernel.metric.ConceptPairSimilarity;
+import org.apache.ctakes.ytex.kernel.metric.ConceptSimilarityService;
+import org.apache.ctakes.ytex.kernel.metric.ConceptSimilarityService.SimilarityMetricEnum;
+import org.springframework.beans.factory.InitializingBean;
+
+
+public class SemanticSimKernel extends CacheKernel implements InitializingBean {
+	private static final Log log = LogFactory.getLog(LinKernel.class);
+	private Map<String, Double> conceptFilter = null;
+	private ConceptSimilarityService conceptSimilarityService;
+	private double cutoff = 0;
+	private String label = null;
+	private String metricNames;
+	private List<SimilarityMetricEnum> metrics;
+	private Integer rankCutoff = null;
+
+	@Override
+	public void afterPropertiesSet() throws Exception {
+		super.afterPropertiesSet();
+		this.initializeConceptFilter();
+	}
+
+	/**
+	 * override CacheKernel - don't bother caching evaluation if the concepts
+	 * are not in the conceptFilter, or if they are identical.
+	 */
+	@Override
+	public double evaluate(Object o1, Object o2) {
+		String c1 = (String) o1;
+		String c2 = (String) o2;
+		double d = 0;
+		if (c1 != null && c2 != null) {
+			if (c1.equals(c2)) {
+				d = 1d;
+			} else if (this.conceptFilter == null
+					|| (conceptFilter.containsKey((String) o1) && conceptFilter
+							.containsKey((String) o2))) {
+				d = super.evaluate(o1, o2);
+			}
+		}
+		return d;
+	}
+
+	public ConceptSimilarityService getConceptSimilarityService() {
+		return conceptSimilarityService;
+	}
+
+	public double getCutoff() {
+		return cutoff;
+	}
+
+	public String getLabel() {
+		return label;
+	}
+
+	public String getMetricNames() {
+		return metricNames;
+	}
+
+	public Integer getRankCutoff() {
+		return rankCutoff;
+	}
+
+	protected void initializeConceptFilter() {
+		if (rankCutoff != null) {
+			conceptFilter = new HashMap<String, Double>();
+			cutoff = conceptSimilarityService.loadConceptFilter(label,
+					rankCutoff, conceptFilter);
+			if (conceptFilter.isEmpty()) {
+				log.warn("no concepts that matched the threshold for supervised semantic similarity. label="
+						+ label + ", rankCutoff=" + rankCutoff);
+			}
+		}
+	}
+
+	/**
+	 * return the product of all the similarity metrics
+	 */
+	@Override
+	public double innerEvaluate(Object o1, Object o2) {
+		double d = 0;
+		String c1 = (String) o1;
+		String c2 = (String) o2;
+		if (c1 != null && c2 != null) {
+			if (c1.equals(c2)) {
+				d = 1;
+			} else {
+				d = 1;
+				ConceptPairSimilarity csim = conceptSimilarityService
+						.similarity(metrics, c1, c2, conceptFilter, false);
+				for (Double simVal : csim.getSimilarities()) {
+					d *= simVal;
+				}
+			}
+		}
+		return d;
+	}
+
+	public void setConceptSimilarityService(
+			ConceptSimilarityService conceptSimilarityService) {
+		this.conceptSimilarityService = conceptSimilarityService;
+	}
+
+	public void setCutoff(double cutoff) {
+		this.cutoff = cutoff;
+	}
+
+	public void setLabel(String label) {
+		this.label = label;
+	}
+
+	public void setMetricNames(String metricNames) {
+		this.metricNames = metricNames;
+		this.metrics = new ArrayList<SimilarityMetricEnum>();
+		for(String metricName : metricNames.split(",")) {
+			SimilarityMetricEnum s = SimilarityMetricEnum.valueOf(metricName);
+			if(s == null) {
+				throw new RuntimeException("invalid metric name: " + metricName);
+			}
+			metrics.add(s);
+		}
+	}
+
+	public void setRankCutoff(Integer rankCutoff) {
+		this.rankCutoff = rankCutoff;
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticTypeKernel.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticTypeKernel.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticTypeKernel.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticTypeKernel.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,392 @@
+package org.apache.ctakes.ytex.kernel.evaluator;
+
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.ctakes.ytex.kernel.metric.ConceptSimilarityService;
+
+
+/**
+ * Before comparing semantic distance, use this kernel to filter by semantic
+ * type.
+ * <p/>
+ * Modes:
+ * <li>MAINSUI (default): concept's main semantic types must overlap
+ * <li>TUI: concept's TUIs must overlap.
+ * <p/>
+ * The MAINSUI mode is taken from Sujeevan Aseervatham's semantic kernel. It
+ * maps all semantic types to a handful of semantic types.
+ * <p/>
+ * The corpusName parameter specifies the concepts for which cuis' semantic
+ * types will be loaded
+ * 
+ * @author vijay
+ * 
+ */
+public class SemanticTypeKernel extends CacheKernel {
+	private static final Log log = LogFactory.getLog(SemanticTypeKernel.class);
+	private static final String MAINSUI = "MAINSUI";
+	private static final String TUI = "TUI";
+
+	public static int getMainSem(int sui) {
+		switch (sui) {
+		case 52:
+		case 53:
+		case 56:
+		case 51:
+		case 64:
+		case 55:
+		case 66:
+		case 57:
+		case 54:
+			return 0;
+		case 17:
+		case 29:
+		case 23:
+		case 30:
+		case 31:
+		case 22:
+		case 25:
+		case 26:
+		case 18:
+		case 21:
+		case 24:
+			return 1;
+		case 116:
+		case 195:
+		case 123:
+		case 122:
+		case 118:
+		case 103:
+		case 120:
+		case 104:
+		case 200:
+		case 111:
+		case 196:
+		case 126:
+		case 131:
+		case 125:
+		case 129:
+		case 130:
+		case 197:
+		case 119:
+		case 124:
+		case 114:
+		case 109:
+		case 115:
+		case 121:
+		case 192:
+		case 110:
+		case 127:
+			return 2;
+		case 185:
+		case 77:
+		case 169:
+		case 102:
+		case 78:
+		case 170:
+		case 171:
+		case 80:
+		case 81:
+		case 89:
+		case 82:
+		case 79:
+			return 3;
+		case 203:
+		case 74:
+		case 75:
+			return 4;
+		case 20:
+		case 190:
+		case 49:
+		case 19:
+		case 47:
+		case 50:
+		case 33:
+		case 37:
+		case 48:
+		case 191:
+		case 46:
+		case 184:
+			return 5;
+		case 87:
+		case 88:
+		case 28:
+		case 85:
+		case 86:
+			return 6;
+		case 83:
+			return 7;
+		case 100:
+		case 3:
+		case 11:
+		case 8:
+		case 194:
+		case 7:
+		case 12:
+		case 99:
+		case 13:
+		case 4:
+		case 96:
+		case 16:
+		case 9:
+		case 15:
+		case 1:
+		case 101:
+		case 2:
+		case 98:
+		case 97:
+		case 14:
+		case 6:
+		case 10:
+		case 204: // vng missing sui
+		case 5:
+			return 8;
+		case 71:
+		case 168:
+		case 73:
+		case 72:
+		case 167:
+			return 9;
+		case 91:
+		case 90:
+			return 10;
+		case 93:
+		case 92:
+		case 94:
+		case 95:
+			return 11;
+		case 38:
+		case 69:
+		case 68:
+		case 34:
+		case 70:
+		case 67:
+			return 12;
+		case 43:
+		case 201:
+		case 45:
+		case 41:
+		case 44:
+		case 42:
+		case 32:
+		case 40:
+		case 39:
+			return 13;
+		case 60:
+		case 65:
+		case 58:
+		case 59:
+		case 63:
+		case 62:
+		case 61:
+			return 14;
+		default:
+			break;
+		}
+		return -1;
+	}
+
+	private ConceptSimilarityService conceptSimilarityService;
+	private String corpusName;
+	private Map<String, Set<Integer>> cuiMainSuiMap = new HashMap<String, Set<Integer>>();
+	private Map<String, BitSet> cuiTuiMap = null;
+	private List<String> tuiList = null;
+	private String cuiTuiQuery;
+	// private DataSource dataSource;
+	// private SimpleJdbcTemplate simpleJdbcTemplate;
+	// private JdbcTemplate jdbcTemplate;
+
+	private String mode = "MAINSUI";
+
+	// private PlatformTransactionManager transactionManager;
+
+	// private void addCuiTuiToMap(Map<String, String> tuiMap, String cui,
+	// String tui) {
+	// // get 'the' tui string
+	// if (tuiMap.containsKey(tui))
+	// tui = tuiMap.get(tui);
+	// else
+	// tuiMap.put(tui, tui);
+	// Set<String> tuis = cuiTuiMap.get(cui);
+	// if (tuis == null) {
+	// tuis = new HashSet<String>();
+	// cuiTuiMap.put(cui, tuis);
+	// }
+	// tuis.add(tui);
+	// }
+
+	/**
+	 * concepts have overlapping semantic types? yes return 1, else return 0
+	 */
+	public double innerEvaluate(Object o1, Object o2) {
+		if (o1 == null || o2 == null)
+			return 0;
+		else if (o1.equals(o2))
+			return 1.0;
+		else if (this.getMode() == null || this.getMode().length() == 0
+				|| MAINSUI.equals(this.getMode()))
+			return mainSuiCheck(o1, o2);
+		else if (TUI.equals(this.getMode()))
+			return tuiCheck(o1, o2);
+		else {
+			log.error("invalid mode");
+			throw new RuntimeException("invalid mode");
+		}
+	}
+
+	public ConceptSimilarityService getConceptSimilarityService() {
+		return conceptSimilarityService;
+	}
+
+	public String getCorpusName() {
+		return corpusName;
+	}
+
+	public String getCuiTuiQuery() {
+		return cuiTuiQuery;
+	}
+
+	//
+	// public DataSource getDataSource() {
+	// return dataSource;
+	// }
+
+	public String getMode() {
+		return mode;
+	}
+
+	// public PlatformTransactionManager getTransactionManager() {
+	// return transactionManager;
+	// }
+
+	public void init() {
+		// TransactionTemplate t = new
+		// TransactionTemplate(this.transactionManager);
+		// t.setPropagationBehavior(TransactionTemplate.PROPAGATION_REQUIRES_NEW);
+		// t.execute(new TransactionCallback<Object>() {
+		// @Override
+		// public Object doInTransaction(TransactionStatus arg0) {
+		cuiTuiMap = conceptSimilarityService.getCuiTuiMap();
+		tuiList = conceptSimilarityService.getTuiList();
+		initCuiMainSuiMap();
+		// return null;
+		// }
+		// });
+	}
+
+	/**
+	 * init the cui -> 'main sui' map.
+	 */
+	private void initCuiMainSuiMap() {
+		if (cuiTuiMap != null) {
+			for (Map.Entry<String, BitSet> cuiTui : cuiTuiMap.entrySet()) {
+				cuiMainSuiMap.put(cuiTui.getKey(),
+						tuiToMainSui(cuiTui.getValue()));
+			}
+		}
+	}
+
+	// /**
+	// * init cui-tui map from query
+	// */
+	// public void initCuiTuiMapFromQuery() {
+	// this.jdbcTemplate.query(this.cuiTuiQuery, new RowCallbackHandler() {
+	// // don't duplicate tui strings to save memory
+	// Map<String, String> tuiMap = new HashMap<String, String>();
+	//
+	// @Override
+	// public void processRow(ResultSet rs) throws SQLException {
+	// String cui = rs.getString(1);
+	// String tui = rs.getString(2);
+	// addCuiTuiToMap(tuiMap, cui, tui);
+	// }
+	// });
+	// }
+
+	/**
+	 * 
+	 * @param o1
+	 *            cui
+	 * @param o2
+	 *            cui
+	 * @return concepts have overlapping main semantic types, return 1, else
+	 *         return 0
+	 */
+	private double mainSuiCheck(Object o1, Object o2) {
+		Set<Integer> tuis1 = cuiMainSuiMap.get((String) o1);
+		Set<Integer> tuis2 = cuiMainSuiMap.get((String) o2);
+		// only compare the two if they have a common semantic type
+		if (tuis1 != null && tuis2 != null
+				&& !Collections.disjoint(tuis1, tuis2)) {
+			return 1;
+		} else {
+			return 0;
+		}
+	}
+
+	public void setConceptSimilarityService(
+			ConceptSimilarityService conceptSimilarityService) {
+		this.conceptSimilarityService = conceptSimilarityService;
+	}
+
+	public void setCorpusName(String corpusName) {
+		this.corpusName = corpusName;
+	}
+
+	public void setCuiTuiQuery(String cuiTuiQuery) {
+		this.cuiTuiQuery = cuiTuiQuery;
+	}
+
+	// public void setDataSource(DataSource dataSource) {
+	// this.dataSource = dataSource;
+	// // this.simpleJdbcTemplate = new SimpleJdbcTemplate(dataSource);
+	// this.jdbcTemplate = new JdbcTemplate(dataSource);
+	// }
+
+	public void setMode(String mode) {
+		this.mode = mode;
+	}
+
+	// public void setTransactionManager(
+	// PlatformTransactionManager transactionManager) {
+	// this.transactionManager = transactionManager;
+	// }
+
+	/**
+	 * 
+	 * @param o1
+	 *            cui
+	 * @param o2
+	 *            cui
+	 * @return concepts have overlapping tuis, return 1, else return 0
+	 */
+	private double tuiCheck(Object o1, Object o2) {
+		if(cuiTuiMap == null)
+			return 0;
+		BitSet tuis1 = this.cuiTuiMap.get((String) o1);
+		BitSet tuis2 = this.cuiTuiMap.get((String) o2);
+		if (tuis1 != null && tuis2 != null && tuis1.intersects(tuis2)) {
+			return 1;
+		} else {
+			return 0;
+		}
+	}
+
+	public Set<Integer> tuiToMainSui(BitSet tuis) {
+		Set<Integer> mainSui = new HashSet<Integer>(tuis.size());
+		for (int i = tuis.nextSetBit(0); i >= 0; i = tuis.nextSetBit(i + 1)) {
+			String tui = this.tuiList.get(i);
+			mainSui.add(getMainSem(Integer.parseInt(tui.substring(1))));
+		}
+		return mainSui;
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SumKernel.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SumKernel.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SumKernel.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SumKernel.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,30 @@
+package org.apache.ctakes.ytex.kernel.evaluator;
+
+import java.util.List;
+
+/**
+ * apply all the delegate kernels to the objects, sum them up
+ */
+public class SumKernel extends CacheKernel {
+	List<Kernel> delegateKernels;
+
+	public List<Kernel> getDelegateKernels() {
+		return delegateKernels;
+	}
+
+	public void setDelegateKernels(List<Kernel> delegateKernels) {
+		this.delegateKernels = delegateKernels;
+	}
+
+	/**
+	 * 
+	 */
+	@Override
+	public double innerEvaluate(Object o1, Object o2) {
+		double d = 0;
+		for(Kernel k : delegateKernels) {
+			d += k.evaluate(o1, o2);
+		}
+		return d;
+	}
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SymmetricPairCacheKeyGenerator.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SymmetricPairCacheKeyGenerator.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SymmetricPairCacheKeyGenerator.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SymmetricPairCacheKeyGenerator.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,28 @@
+package org.apache.ctakes.ytex.kernel.evaluator;
+
+import java.lang.reflect.Method;
+
+import org.apache.ctakes.ytex.kernel.OrderedPair;
+
+
+/**
+ * cache key for a method that takes 2 arguments, and is symmetric - the order
+ * of the arguments doesn't matter.
+ * 
+ * @author vijay
+ * 
+ */
+public class SymmetricPairCacheKeyGenerator implements CacheKeyGenerator {
+
+	@SuppressWarnings({ "unchecked", "rawtypes" })
+	@Override
+	public Object getCacheKey(Method method, Object[] args) {
+		return new OrderedPair((Comparable) args[0], (Comparable) args[1]);
+	}
+
+	@SuppressWarnings({ "rawtypes", "unchecked" })
+	public Object getCacheKey(Object o1, Object o2) {
+		return new OrderedPair((Comparable) o1, (Comparable) o2);
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/TreePrinter.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/TreePrinter.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/TreePrinter.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/TreePrinter.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,37 @@
+package org.apache.ctakes.ytex.kernel.evaluator;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.ctakes.ytex.kernel.tree.InstanceTreeBuilder;
+import org.apache.ctakes.ytex.kernel.tree.Node;
+import org.springframework.context.ApplicationContext;
+import org.springframework.context.access.ContextSingletonBeanFactoryLocator;
+
+
+public class TreePrinter {
+
+	public static void main(String args[]) throws IOException, ClassNotFoundException {
+		String beanRefContext = "classpath*:org/apache/ctakes/ytex/kernelBeanRefContext.xml";
+		String contextName = "kernelApplicationContext";
+		ApplicationContext appCtx = (ApplicationContext) ContextSingletonBeanFactoryLocator
+				.getInstance(beanRefContext)
+				.useBeanFactory(contextName).getFactory();
+		ApplicationContext appCtxSource = appCtx;
+		InstanceTreeBuilder builder = appCtxSource.getBean(
+				"instanceTreeBuilder", InstanceTreeBuilder.class);
+		Map<Long, Node> instanceMap = builder.loadInstanceTrees(args[0]);
+		for(Node node : instanceMap.values())
+			printTree(node, 0);
+	}
+
+	private static void printTree(Node node, int depth) {
+		for(int i = 0; i<= depth; i++) {
+			System.out.print("  ");
+		}
+		System.out.println(node);
+		for(Node child : node.getChildren()) {
+			printTree(child, depth+1);
+		}
+	}
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/WeightedPolynomialMixingKernel.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/WeightedPolynomialMixingKernel.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/WeightedPolynomialMixingKernel.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/WeightedPolynomialMixingKernel.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,135 @@
+package org.apache.ctakes.ytex.kernel.evaluator;
+
+import java.util.Map;
+
+import org.apache.ctakes.ytex.kernel.tree.Node;
+import org.springframework.beans.factory.InitializingBean;
+
+
+/**
+ * weighted polynomial mixing kernel: <code>
+ * 	(\sum w_i * k(x_i, y_i) \div \sum w_i ) ^ l 
+ * </code>
+ * <ul>
+ * <li>Apply the delegate kernel to the respective 'parts' of this node (x_i,
+ * y_i)
+ * <li>Multiply the result by the weight (w_i * k(x_i,y_i))
+ * <li>Sum everything up, divide by the sum of the weights
+ * <li>take the power of everything to l
+ * </ul>
+ * 
+ * 
+ * {@link #pow} the power to raise things to
+ * <p/>
+ * {@link #attributeKey} the children of this node form a 'vector'. This is the
+ * attribute that we use to 'index' this vector
+ * <p/>
+ * {@link #mapIndexWeight} the indices (i.e. the value of
+ * <code>child.getValue().get(attributeKey)</code> and the corresponding
+ * weights. The indices must be integers. The
+ * class must be identical to the class of the child - take care that the
+ * numeric types match.
+ * <p/>
+ * {@link #delegateKernel} the kernel to apply to pairs of children.
+ * 
+ * @author vijay
+ * 
+ */
+public class WeightedPolynomialMixingKernel implements Kernel, InitializingBean {
+
+	private int pow = 1;
+	private String attributeKey;
+	private Map<Integer, Double> mapIndexWeight;
+	private Kernel delegateKernel;
+
+	public int getPow() {
+		return pow;
+	}
+
+	public void setPow(int pow) {
+		this.pow = pow;
+	}
+
+	public String getAttributeKey() {
+		return attributeKey;
+	}
+
+	public void setAttributeKey(String attributeKey) {
+		this.attributeKey = attributeKey;
+	}
+
+	public Map<Integer, Double> getMapIndexWeight() {
+		return mapIndexWeight;
+	}
+
+	public void setMapIndexWeight(Map<Integer, Double> mapIndexWeight) {
+		this.mapIndexWeight = mapIndexWeight;
+	}
+
+	public Kernel getDelegateKernel() {
+		return delegateKernel;
+	}
+
+	public void setDelegateKernel(Kernel delegateKernel) {
+		this.delegateKernel = delegateKernel;
+	}
+
+	private double scalingFactor;
+
+	@Override
+	public double evaluate(Object o1, Object o2) {
+		double retVal = 0;
+		// both objects must be nodes
+		if ((o1 instanceof Node) && (o2 instanceof Node)) {
+			double keval = 0;
+			// iterate through the 'indices' and the weights
+			for (Map.Entry<Integer, Double> indexWeight : mapIndexWeight
+					.entrySet()) {
+				// get the pair of matching nodes
+				Node n1 = getNodeForIndex(indexWeight.getKey(), (Node) o1);
+				Node n2 = getNodeForIndex(indexWeight.getKey(), (Node) o2);
+				if (n1 != null && n2 != null) {
+					// evaluate the kernel, multiply by weight, add to running
+					// sum
+					keval += (delegateKernel.evaluate(n1, n2) * indexWeight
+							.getValue());
+				}
+			}
+			if (keval != 0) {
+				// raise to the power, divide by the scaling factor
+				retVal = Math.pow(keval, pow) / scalingFactor;
+			}
+		}
+		return retVal;
+
+	}
+
+	/**
+	 * @param index
+	 *            the attribute has to match this
+	 * @param o1
+	 *            the node whose children we're going to search
+	 * @return node if found, else null
+	 */
+	private Node getNodeForIndex(int index, Node o1) {
+		for (Node n : o1.getChildren()) {
+			Integer attribute = (Integer)n.getValue().get(attributeKey);
+			if (attribute != null && index == attribute.intValue())
+				return n;
+		}
+		return null;
+	}
+
+	/**
+	 * precompute the scaling factor - we will always divide by this
+	 */
+	@Override
+	public void afterPropertiesSet() throws Exception {
+		double totalWeight = 0d;
+		for (double weight : this.mapIndexWeight.values()) {
+			totalWeight += weight;
+		}
+		this.scalingFactor = Math.pow(totalWeight, pow);
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/BaseSimilarityMetric.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/BaseSimilarityMetric.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/BaseSimilarityMetric.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/BaseSimilarityMetric.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,92 @@
+package org.apache.ctakes.ytex.kernel.metric;
+
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+
+public abstract class BaseSimilarityMetric implements SimilarityMetric {
+
+	protected ConceptSimilarityService simSvc;
+
+	public ConceptSimilarityService getConceptSimilarityService() {
+		return simSvc;
+	}
+
+	public void setConceptSimilarityService(
+			ConceptSimilarityService conceptSimilarityService) {
+		this.simSvc = conceptSimilarityService;
+	}
+
+	/**
+	 * compute the lcses and min path distance for the concept pair, if this
+	 * hasn't been done already
+	 * 
+	 * @param concept1
+	 * @param concept2
+	 * @param simInfo
+	 */
+	protected void initLCSes(String concept1, String concept2,
+			SimilarityInfo simInfo) {
+		if (simInfo.getLcsDist() == null) {
+			simInfo.setLcsDist(simSvc.getLCS(concept1, concept2,
+					simInfo.getLcses(), simInfo.getLcsPaths()));
+		}
+	}
+
+	/**
+	 * get the best lcs and its information content if this hasn't been done
+	 * already.
+	 * 
+	 * @param conceptFilter
+	 * @param simInfo
+	 * @param intrinsicIC
+	 *            set to false for corpus based ic
+	 * @return
+	 */
+	protected double initLcsIC(Map<String, Double> conceptFilter,
+			SimilarityInfo simInfo, boolean intrinsicIC) {
+		Double lcsIC = intrinsicIC ? simInfo.getIntrinsicLcsIC() : simInfo
+				.getCorpusLcsIC();
+		if (lcsIC == null) {
+			String lcs = null;
+			lcsIC = 0d;
+			Object[] bestLCSArr = simSvc.getBestLCS(simInfo.getLcses(),
+					intrinsicIC, conceptFilter);
+			if (bestLCSArr != null) {
+				lcs = (String) bestLCSArr[0];
+				lcsIC = (Double) bestLCSArr[1];
+				if (intrinsicIC) {
+					simInfo.setIntrinsicLcs(lcs);
+					simInfo.setIntrinsicLcsIC(lcsIC);
+				} else {
+					simInfo.setCorpusLcs(lcs);
+					simInfo.setCorpusLcsIC(lcsIC);
+				}
+			}
+		}
+		return lcsIC;
+	}
+
+	/**
+	 * call initLCSes and initLcsIC
+	 * 
+	 * @param concept1
+	 * @param concept2
+	 * @param conceptFilter
+	 * @param simInfo
+	 * @param intrinsicIC
+	 * @return
+	 */
+	protected double initLcsIC(String concept1, String concept2,
+			Map<String, Double> conceptFilter, SimilarityInfo simInfo,
+			boolean intrinsicIC) {
+		this.initLCSes(concept1, concept2, simInfo);
+		return this.initLcsIC(conceptFilter, simInfo, intrinsicIC);
+	}
+
+	public BaseSimilarityMetric(ConceptSimilarityService simSvc) {
+		this.simSvc = simSvc;
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptInfo.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptInfo.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptInfo.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptInfo.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,71 @@
+package org.apache.ctakes.ytex.kernel.metric;
+
+/**
+ * we run into out of memory errors when preloading the intrinsic ic for large
+ * concept graphs. 'compress' the depth a tiny bit by using short instead of
+ * int.
+ * <p>
+ * Tried using float instead of double, but didn't get into the under 1gb range
+ * for very large concept graphs, so just use double to avoid precision errors.
+ * 
+ * @author vijay
+ * 
+ */
+public class ConceptInfo {
+	private String conceptId;
+	private short depth;
+	// private float corpusIC;
+	// private float intrinsicIC;
+	private double corpusIC;
+	private double intrinsicIC;
+
+	public ConceptInfo() {
+		super();
+	}
+
+	public ConceptInfo(String conceptId, int depth, double corpusIC,
+			double intrinsicIC) {
+		super();
+		this.conceptId = conceptId;
+		this.depth = (short) depth;
+		// this.corpusIC = (float) corpusIC;
+		// this.intrinsicIC = (float) intrinsicIC;
+		this.corpusIC = corpusIC;
+		this.intrinsicIC = intrinsicIC;
+	}
+
+	public String getConceptId() {
+		return conceptId;
+	}
+
+	public void setConceptId(String conceptId) {
+		this.conceptId = conceptId;
+	}
+
+	public int getDepth() {
+		return (int) depth;
+	}
+
+	public void setDepth(int depth) {
+		this.depth = (short) depth;
+	}
+
+	public double getCorpusIC() {
+		return (double) corpusIC;
+	}
+
+	public void setCorpusIC(double corpusIC) {
+		// this.corpusIC = (float) corpusIC;
+		this.corpusIC = (double) corpusIC;
+	}
+
+	public double getIntrinsicIC() {
+		return (double) intrinsicIC;
+	}
+
+	public void setIntrinsicIC(double intrinsicIC) {
+		// this.intrinsicIC = (float) intrinsicIC;
+		this.intrinsicIC = (double) intrinsicIC;
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPair.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPair.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPair.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPair.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,99 @@
+package org.apache.ctakes.ytex.kernel.metric;
+
+import java.io.Serializable;
+
+import javax.xml.bind.annotation.XmlAttribute;
+
+/**
+ * pair of concepts. used to submit a set of concepts to the similarity service
+ * to compute pairwise similarity.
+ * 
+ * @author vijay
+ * 
+ */
+public class ConceptPair implements Serializable, Comparable<ConceptPair> {
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = 1L;
+	private String concept1;
+	private String concept2;
+
+	@XmlAttribute public String getConcept1() {
+		return concept1;
+	}
+
+	public void setConcept1(String concept1) {
+		this.concept1 = concept1;
+	}
+
+	@XmlAttribute public String getConcept2() {
+		return concept2;
+	}
+
+	public void setConcept2(String concept2) {
+		this.concept2 = concept2;
+	}
+
+	public ConceptPair(String concept1, String concept2) {
+		super();
+		this.concept1 = concept1;
+		this.concept2 = concept2;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result
+				+ ((concept1 == null) ? 0 : concept1.hashCode());
+		result = prime * result
+				+ ((concept2 == null) ? 0 : concept2.hashCode());
+		return result;
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		ConceptPair other = (ConceptPair) obj;
+		if (concept1 == null) {
+			if (other.concept1 != null)
+				return false;
+		} else if (!concept1.equals(other.concept1))
+			return false;
+		if (concept2 == null) {
+			if (other.concept2 != null)
+				return false;
+		} else if (!concept2.equals(other.concept2))
+			return false;
+		return true;
+	}
+
+	@Override
+	public String toString() {
+		return "ConceptPair [concept1=" + concept1 + ", concept2=" + concept2
+				+ "]";
+	}
+
+	public ConceptPair() {
+		super();
+	}
+
+
+	/**
+	 * compare concept 1, then concept 2
+	 */
+	@Override
+	public int compareTo(ConceptPair other) {
+		int c1 = getConcept1().compareTo(other.getConcept1());
+		if(c1 != 0)
+			return c1;
+		return getConcept2().compareTo(other.getConcept2());
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPairSimilarity.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPairSimilarity.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPairSimilarity.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPairSimilarity.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,51 @@
+package org.apache.ctakes.ytex.kernel.metric;
+
+import java.io.Serializable;
+import java.util.List;
+
+import javax.xml.bind.annotation.XmlAttribute;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+
+@XmlRootElement(name = "conceptPairSimilarity")
+public class ConceptPairSimilarity implements Serializable {
+	/**
+	 * 
+	 */
+	private static final long serialVersionUID = 1L;
+	private ConceptPair conceptPair;
+
+	private List<Double> similarities;
+	private SimilarityInfo similarityInfo;
+
+	public ConceptPairSimilarity() {
+		super();
+	}
+
+	@XmlElement
+	public ConceptPair getConceptPair() {
+		return conceptPair;
+	}
+
+	@XmlAttribute
+	public List<Double> getSimilarities() {
+		return similarities;
+	}
+
+	@XmlElement
+	public SimilarityInfo getSimilarityInfo() {
+		return similarityInfo;
+	}
+
+	public void setConceptPair(ConceptPair conceptPair) {
+		this.conceptPair = conceptPair;
+	}
+
+	public void setSimilarities(List<Double> similarities) {
+		this.similarities = similarities;
+	}
+
+	public void setSimilarityInfo(SimilarityInfo similarityInfo) {
+		this.similarityInfo = similarityInfo;
+	}
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityService.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityService.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityService.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityService.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,186 @@
+package org.apache.ctakes.ytex.kernel.metric;
+
+import java.util.BitSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.ctakes.ytex.kernel.model.ConceptGraph;
+
+
+public interface ConceptSimilarityService {
+
+	public enum SimilarityMetricEnum {
+		LCH(false, false), INTRINSIC_LCH(true, false), LIN(false, true), INTRINSIC_LIN(
+				true, false), PATH(false, false), INTRINSIC_PATH(true, false), JACCARD(
+				true, false), SOKAL(true, false), RADA(false, false), INTRINSIC_RADA(
+				true, false), WUPALMER(false, false), PAGERANK(false, false);
+		boolean intrinsicIC = false;
+		boolean corpusIC = false;
+
+		/**
+		 * is this measure taxonomy based?
+		 * 
+		 * @return
+		 */
+		public boolean isTaxonomy() {
+			return !intrinsicIC && !corpusIC;
+		}
+
+		/**
+		 * is this measure based on intrinsic IC?
+		 * 
+		 * @return
+		 */
+		public boolean isIntrinsicIC() {
+			return intrinsicIC;
+		}
+
+		/**
+		 * is this measure based on corpus IC?
+		 * 
+		 * @return
+		 */
+		public boolean isCorpusIC() {
+			return corpusIC;
+		}
+
+		SimilarityMetricEnum(boolean intrinsicIC, boolean corpusIC) {
+			this.intrinsicIC = intrinsicIC;
+			this.corpusIC = corpusIC;
+		}
+	}
+
+	public String getConceptGraphName();
+
+	// public abstract double lch(String concept1, String concept2);
+
+	// public abstract double lin(String concept1, String concept2);
+
+	public int lcs(String concept1, String concept2, List<LCSPath> lcsPath);
+
+	public abstract ConceptGraph getConceptGraph();
+
+	/**
+	 * cui - tui map. tuis are bitsets, indices correspond to tuis in
+	 * {@link #getTuiList()}
+	 * 
+	 * @return
+	 */
+	public abstract Map<String, BitSet> getCuiTuiMap();
+
+	// /**
+	// * supervised lin measure.
+	// *
+	// * @param concept1
+	// * @param concept2
+	// * @param conceptFilter
+	// * map of concept id to imputed infogain. if the concept isn't in
+	// * this map, the concepts won't be compared. null for
+	// * unsupervised lin.
+	// * @return
+	// */
+	// public abstract double filteredLin(String concept1, String concept2,
+	// Map<String, Double> conceptFilter);
+
+	/**
+	 * list of tuis that corresponds to bitset indices
+	 * 
+	 * @return
+	 */
+	public abstract List<String> getTuiList();
+
+	/**
+	 * For the given label and cutoff, get the corresponding concepts whose
+	 * propagated ig meets the threshold. Used by lin kernel to find concepts
+	 * that actually have a non-trivial similarity
+	 * 
+	 * @param label
+	 *            label
+	 * @param rankCutoff
+	 *            cutoff
+	 * @param conceptFilter
+	 *            set to fill with concepts
+	 * @return double minimum evaluation
+	 */
+	public abstract double loadConceptFilter(String label, int rankCutoff,
+			Map<String, Double> conceptFilter);
+
+	/**
+	 * get the lcs(s) for the specified concepts
+	 * 
+	 * @param concept1
+	 *            required
+	 * @param concept2
+	 *            required
+	 * @param lcses
+	 *            required - will be filled with the lcs(s).
+	 * @param lcsPathMap
+	 *            optional - will be filled with lcs and paths through the
+	 *            lcses.
+	 * @return distance of path through lcs
+	 */
+	public int getLCS(String concept1, String concept2, Set<String> lcses,
+			List<LCSPath> lcsPaths);
+
+	/**
+	 * get the best lcs
+	 * 
+	 * @param lcses
+	 *            set of lcses
+	 * @param intrinsicIC
+	 *            should the intrinsic ic be used? false - use corpus-based ic.
+	 *            For multiple lcses not using concept filter, use the lcs with
+	 *            the lowest infocontent
+	 * @param conceptFilter
+	 *            limit to lcses in the concept filter. The lcs with the highest
+	 *            value will be used.
+	 * @return array with 2 entries. Entry 1 - lcs (String). Entry 2 -
+	 *         infocontent (double). Null if no lcses are in the concept filter.
+	 */
+	public Object[] getBestLCS(Set<String> lcses, boolean intrinsicIC,
+			Map<String, Double> conceptFilter);
+
+	public abstract double getIC(String concept, boolean intrinsicICMap);
+
+	/**
+	 * compute similarity for a pair of concepts
+	 * 
+	 * @param metrics
+	 *            required, similarity metrics to compute
+	 * @param concept1
+	 *            required
+	 * @param concept2
+	 *            required
+	 * @param conceptFilter
+	 *            optional - only lcs's in this set will be used.
+	 * @param simInfo
+	 *            optional - pass this to get information on lcs. Instantiate
+	 *            the lcsPathMap to get paths through lcs
+	 * @return similarities
+	 */
+	public abstract ConceptPairSimilarity similarity(
+			List<SimilarityMetricEnum> metrics, String concept1,
+			String concept2, Map<String, Double> conceptFilter, boolean lcs);
+
+	/**
+	 * compute similarity for a list of concept pairs
+	 * 
+	 * @param conceptPairs
+	 *            required, concept pairs for which similarity should be
+	 *            computed
+	 * @param metrics
+	 *            required, similarity metrics to compute
+	 * @param conceptFilter
+	 *            optional - only lcs's in this set will be used.
+	 * @param simInfos
+	 *            optional - if provided, this list will be filled with the
+	 *            similarity info for each concept pair.
+	 * @return similarities
+	 */
+	public List<ConceptPairSimilarity> similarity(
+			List<ConceptPair> conceptPairs, List<SimilarityMetricEnum> metrics,
+			Map<String, Double> conceptFilter, boolean lcs);
+
+	public abstract int getDepth(String concept);
+}
\ No newline at end of file

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,1079 @@
+package org.apache.ctakes.ytex.kernel.metric;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import net.sf.ehcache.Cache;
+import net.sf.ehcache.CacheManager;
+import net.sf.ehcache.Element;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.ctakes.ytex.kernel.ImputedFeatureEvaluator;
+import org.apache.ctakes.ytex.kernel.InfoContentEvaluator;
+import org.apache.ctakes.ytex.kernel.IntrinsicInfoContentEvaluator;
+import org.apache.ctakes.ytex.kernel.OrderedPair;
+import org.apache.ctakes.ytex.kernel.SimSvcContextHolder;
+import org.apache.ctakes.ytex.kernel.dao.ClassifierEvaluationDao;
+import org.apache.ctakes.ytex.kernel.dao.ConceptDao;
+import org.apache.ctakes.ytex.kernel.model.ConcRel;
+import org.apache.ctakes.ytex.kernel.model.ConceptGraph;
+import org.apache.ctakes.ytex.kernel.model.FeatureRank;
+import org.apache.ctakes.ytex.kernel.pagerank.PageRankService;
+import org.springframework.transaction.PlatformTransactionManager;
+import org.springframework.transaction.TransactionStatus;
+import org.springframework.transaction.support.TransactionCallback;
+import org.springframework.transaction.support.TransactionTemplate;
+
+import com.google.common.collect.ImmutableMap;
+
+
+/**
+ * compute concept similarity
+ * 
+ * @author vijay
+ * 
+ */
+public class ConceptSimilarityServiceImpl implements ConceptSimilarityService {
+	private static final Log log = LogFactory
+			.getLog(ConceptSimilarityServiceImpl.class);
+
+	private static String formatPaths(List<LCSPath> lcsPaths) {
+		StringBuilder b = new StringBuilder();
+		Iterator<LCSPath> lcsPathIter = lcsPaths.iterator();
+		while (lcsPathIter.hasNext()) {
+			LCSPath lcsPath = lcsPathIter.next();
+			String lcs = lcsPath.getLcs();
+			b.append(lcs);
+			b.append("=");
+			b.append(lcsPath.toString());
+			if (lcsPathIter.hasNext())
+				b.append("|");
+		}
+		return b.toString();
+	}
+
+	@SuppressWarnings("static-access")
+	public static void main(String args[]) throws IOException {
+		Options options = new Options();
+		options.addOption(OptionBuilder
+				.withArgName("concepts")
+				.hasArg()
+				.withDescription(
+						"concept pairs or a file containing concept pairs.  To specify pairs on command line, separate concepts by comma, concept pairs by semicolon.  For file, separate concepts by comma or tab, each concept pair on a new line.")
+				.isRequired(true).create("concepts"));
+		options.addOption(OptionBuilder
+				.withArgName("metrics")
+				.hasArg()
+				.withDescription(
+						"comma-separated list of metrics.  Valid metrics: "
+								+ Arrays.asList(SimilarityMetricEnum.values()))
+				.isRequired(true).create("metrics"));
+		options.addOption(OptionBuilder
+				.withArgName("out")
+				.hasArg()
+				.withDescription(
+						"file to write oputput to.  if not specified, output sent to stdout.")
+				.create("out"));
+		options.addOption(OptionBuilder.withArgName("lcs")
+				.withDescription("output lcs and path for each concept pair")
+				.create("lcs"));
+		try {
+			CommandLineParser parser = new GnuParser();
+			CommandLine line = parser.parse(options, args);
+			String concepts = line.getOptionValue("concepts");
+			String metrics = line.getOptionValue("metrics");
+			String out = line.getOptionValue("out");
+			boolean lcs = line.hasOption("lcs");
+			PrintStream os = null;
+			try {
+				if (out != null) {
+					os = new PrintStream(new BufferedOutputStream(
+							new FileOutputStream(out)));
+				} else {
+					os = System.out;
+				}
+				List<ConceptPair> conceptPairs = parseConcepts(concepts);
+				List<SimilarityMetricEnum> metricList = parseMetrics(metrics);
+				ConceptSimilarityService simSvc = SimSvcContextHolder
+						.getApplicationContext().getBean(
+								ConceptSimilarityService.class);
+				List<SimilarityInfo> simInfos = lcs ? new ArrayList<SimilarityInfo>(
+						conceptPairs.size()) : null;
+				List<ConceptPairSimilarity> conceptSimMap = simSvc.similarity(
+						conceptPairs, metricList, null, lcs);
+				printSimilarities(conceptPairs, conceptSimMap, metricList,
+						simInfos, lcs, os);
+				// try {
+				// Thread.sleep(60*1000);
+				// } catch (InterruptedException e) {
+				// e.printStackTrace();
+				// }
+			} finally {
+				if (out != null) {
+					try {
+						os.close();
+					} catch (Exception e) {
+					}
+				}
+			}
+		} catch (ParseException pe) {
+			HelpFormatter formatter = new HelpFormatter();
+			formatter.printHelp(
+					"java " + ConceptSimilarityServiceImpl.class.getName()
+							+ " get concept similiarity", options);
+		}
+	}
+
+	private static List<ConceptPair> parseConcepts(String concepts)
+			throws IOException {
+		BufferedReader r = null;
+		try {
+			List<ConceptPair> conceptPairs = new ArrayList<ConceptPair>();
+			File f = new File(concepts);
+			if (f.exists()) {
+				r = new BufferedReader(new FileReader(f));
+			} else {
+				r = new BufferedReader(new StringReader(concepts));
+			}
+			String line = null;
+			while ((line = r.readLine()) != null) {
+				// for command line, split pairs by semicolon
+				String lines[] = line.split(";");
+				for (String subline : lines) {
+					String pair[] = subline.split(",|\\t");
+					if (pair.length != 2) {
+						System.err.println("cannot parse concept pair: "
+								+ subline);
+					} else {
+						conceptPairs.add(new ConceptPair(pair[0], pair[1]));
+					}
+				}
+			}
+			return conceptPairs;
+		} finally {
+			if (r != null)
+				r.close();
+		}
+	}
+
+	private static List<SimilarityMetricEnum> parseMetrics(String metrics) {
+		String ms[] = metrics.split(",");
+		List<SimilarityMetricEnum> metricSet = new ArrayList<SimilarityMetricEnum>();
+		for (String metric : ms) {
+			SimilarityMetricEnum m = SimilarityMetricEnum.valueOf(metric);
+			if (m == null)
+				System.err.println("invalid metric: " + ms);
+			else
+				metricSet.add(m);
+		}
+		return metricSet;
+	}
+
+	private static void printSimilarities(List<ConceptPair> conceptPairs,
+			List<ConceptPairSimilarity> conceptSimList,
+			List<SimilarityMetricEnum> metricList,
+			List<SimilarityInfo> simInfos, boolean lcs, PrintStream os) {
+		// print header
+		os.print("Concept 1\tConcept 2");
+		for (SimilarityMetricEnum metric : metricList) {
+			os.print("\t");
+			os.print(metric);
+		}
+		if (lcs) {
+			os.print("\tlcs(s)\tcorpus lcs\tintrinsic lcs\tpaths");
+		}
+		os.println();
+		// print content
+		for (ConceptPairSimilarity csim : conceptSimList) {
+			ConceptPair p = csim.getConceptPair();
+			os.print(p.getConcept1());
+			os.print("\t");
+			os.print(p.getConcept2());
+			for (Double sim : csim.getSimilarities()) {
+				os.print("\t");
+				if (sim != null)
+					os.print(String.format("%6f", sim));
+				else
+					os.print(0d);
+			}
+			if (lcs) {
+				SimilarityInfo simInfo = csim.getSimilarityInfo();
+				os.print("\t");
+				Iterator<String> lcsIter = simInfo.getLcses().iterator();
+				while (lcsIter.hasNext()) {
+					os.print(lcsIter.next());
+					if (lcsIter.hasNext())
+						os.print('|');
+				}
+				os.print("\t");
+				os.print(simInfo.getCorpusLcs() == null ? "" : simInfo
+						.getCorpusLcs());
+				os.print("\t");
+				os.print(simInfo.getIntrinsicLcs() == null ? "" : simInfo
+						.getIntrinsicLcs());
+				os.print("\t");
+				os.print(formatPaths(simInfo.getLcsPaths()));
+			}
+			os.println();
+		}
+	}
+
+	private CacheManager cacheManager;
+
+	private ConceptGraph cg = null;
+
+	private ClassifierEvaluationDao classifierEvaluationDao;
+
+	private ConceptDao conceptDao;
+	private String conceptGraphName;
+
+	private String conceptSetName;
+
+	// /**
+	// * information concept cache
+	// */
+	// private Map<String, Double> corpusICMap = null;
+
+	private String corpusName;
+
+	private Map<String, BitSet> cuiTuiMap;
+
+	// private Map<String, ConceptInfo> conceptInfoMap = null;
+	// private ConceptInfo[] conceptInfoCache;
+
+	/**
+	 * cache to hold lcs's
+	 */
+	private Cache lcsCache;
+	private String lcsImputedType = ImputedFeatureEvaluator.MeasureType.INFOGAIN
+			.getName();
+	private PageRankService pageRankService;
+
+	private boolean preload = true;
+	private Map<String, Double> corpusICMap;
+
+	private Map<SimilarityMetricEnum, SimilarityMetric> similarityMetricMap = null;
+
+	private PlatformTransactionManager transactionManager;
+
+	private List<String> tuiList;
+
+	private void addCuiTuiToMap(Map<String, Set<String>> cuiTuiMap,
+			Map<String, String> tuiMap, String cui, String tui) {
+		// get 'the' tui string
+		if (tuiMap.containsKey(tui))
+			tui = tuiMap.get(tui);
+		else
+			tuiMap.put(tui, tui);
+		Set<String> tuis = cuiTuiMap.get(cui);
+		if (tuis == null) {
+			tuis = new HashSet<String>();
+			cuiTuiMap.put(cui, tuis);
+		}
+		tuis.add(tui);
+	}
+
+	// /**
+	// * return lin measure. optionally filter lin measure so that only concepts
+	// * that have an lcs that is relevant to the classification task have a
+	// * non-zero lin measure.
+	// *
+	// * relevant concepts are those whose evaluation wrt the label exceeds a
+	// * threshold.
+	// *
+	// * @param concept1
+	// * @param concept2
+	// * @param label
+	// * if not null, then filter lcses.
+	// * @param lcsMinEvaluation
+	// * if gt; 0, then filter lcses. this is the threshold.
+	// * @return 0 - no lcs, or no lcs that meets the threshold.
+	// */
+	// @Override
+	// public double filteredLin(String concept1, String concept2,
+	// Map<String, Double> conceptFilter) {
+	// double ic1 = getIC(concept1);
+	// double ic2 = getIC(concept2);
+	// // lin not defined if one of the concepts doesn't exist in the corpus
+	// if (ic1 == 0 || ic2 == 0)
+	// return 0;
+	// double denom = getIC(concept1) + getIC(concept2);
+	// if (denom != 0) {
+	// ConcRel cr1 = cg.getConceptMap().get(concept1);
+	// ConcRel cr2 = cg.getConceptMap().get(concept2);
+	// if (cr1 != null && cr2 != null) {
+	// Set<String> lcses = new HashSet<String>();
+	// int dist = getLCSFromCache(cr1, cr2, lcses);
+	// if (dist > 0) {
+	// double ic = getBestIC(lcses, conceptFilter);
+	// return 2 * ic / denom;
+	// }
+	// }
+	// }
+	// return 0;
+	// }
+
+	// /**
+	// * get the information content for the concept with the highest evaluation
+	// * greater than a specified threshold.
+	// *
+	// * If threshold 0, get the lowest IC of all the lcs's.
+	// *
+	// * @param lcses
+	// * the least common subsumers of a pair of concepts
+	// * @param label
+	// * label against which feature was evaluated
+	// * @param lcsMinEvaluation
+	// * threshold that the feature has to exceed. 0 for no filtering.
+	// * @return 0 if no lcs that makes the cut. else find the lcs(es) with the
+	// * maximal evaluation, and return getIC on these lcses.
+	// *
+	// * @see #getIC(Iterable)
+	// */
+	// private double getBestIC(Set<String> lcses,
+	// Map<String, Double> conceptFilter) {
+	// if (conceptFilter != null) {
+	// double currentBest = -1;
+	// Set<String> bestLcses = new HashSet<String>();
+	// for (String lcs : lcses) {
+	// if (conceptFilter.containsKey(lcs)) {
+	// double lcsEval = conceptFilter.get(lcs);
+	// if (currentBest == -1 || lcsEval > currentBest) {
+	// bestLcses.clear();
+	// bestLcses.add(lcs);
+	// currentBest = lcsEval;
+	// } else if (currentBest == lcsEval) {
+	// bestLcses.add(lcs);
+	// }
+	// }
+	// }
+	// if (bestLcses.size() > 0) {
+	// return this.getIC(bestLcses);
+	// }
+	// } else {
+	// // unfiltered - get the lowest ic
+	// return this.getIC(lcses);
+	// }
+	// return 0;
+	// }
+
+	// private ConceptInfo getPreloadedConceptInfo(String conceptId) {
+	// ConcRel cr = cg.getConceptMap().get(conceptId);
+	// if (cr != null) {
+	// return this.conceptInfoCache[cr.getNodeIndex()];
+	// }
+	// return null;
+	// }
+
+	@Override
+	public Object[] getBestLCS(Set<String> lcses, boolean intrinsicIC,
+			Map<String, Double> conceptFilter) {
+		Map<String, Double> lcsICMap = new HashMap<String, Double>(lcses.size());
+		// if (isPreload()) {
+		// look in conceptInfoMap for info content
+		for (String lcs : lcses) {
+			lcsICMap.put(lcs, getIC(lcs, intrinsicIC));
+			// }
+			// } else {
+			// // load info content on demand
+			// Map<String, FeatureRank> frMap = getICOnDemand(lcses,
+			// intrinsicIC);
+			// for (Map.Entry<String, FeatureRank> frMapEntry :
+			// frMap.entrySet()) {
+			// lcsICMap.put(frMapEntry.getKey(), frMapEntry.getValue()
+			// .getEvaluation());
+			// }
+		}
+		if (conceptFilter != null) {
+			double currentBest = -1;
+			Set<String> bestLcses = new HashSet<String>();
+			for (String lcs : lcses) {
+				if (conceptFilter.containsKey(lcs)) {
+					double lcsEval = conceptFilter.get(lcs);
+					if (currentBest == -1 || lcsEval > currentBest) {
+						bestLcses.clear();
+						bestLcses.add(lcs);
+						currentBest = lcsEval;
+					} else if (currentBest == lcsEval) {
+						bestLcses.add(lcs);
+					}
+				}
+			}
+			if (currentBest < 0)
+				currentBest = 0d;
+			if (bestLcses.size() > 0) {
+				return this.getBestLCS(bestLcses, lcsICMap);
+			} else {
+				// no lcses made the cut
+				return null;
+			}
+		} else {
+			// unfiltered - get the lowest ic
+			return this.getBestLCS(lcses, lcsICMap);
+		}
+	}
+
+	private Map<String, FeatureRank> getICOnDemand(Set<String> lcses,
+			boolean intrinsicIC) {
+		if (lcses == null || lcses.isEmpty())
+			return new HashMap<String, FeatureRank>(0);
+		Map<String, FeatureRank> lcsICMap;
+		lcsICMap = this.classifierEvaluationDao
+				.getFeatureRanks(
+						lcses,
+						intrinsicIC ? null : this.corpusName,
+						intrinsicIC ? null : this.conceptSetName,
+						null,
+						intrinsicIC ? IntrinsicInfoContentEvaluator.INTRINSIC_INFOCONTENT
+								: InfoContentEvaluator.INFOCONTENT, null, 0d,
+						this.getConceptGraphName());
+		return lcsICMap;
+	}
+
+	public Object[] getBestLCS(Set<String> lcses, Map<String, Double> icMap) {
+		double ic = -1;
+		String bestLCS = null;
+		for (String lcs : lcses) {
+			Double ictmp = icMap.get(lcs);
+			if (ictmp != null && ic < ictmp.doubleValue()) {
+				ic = ictmp;
+				bestLCS = lcs;
+			}
+		}
+		if (ic < 0)
+			ic = 0d;
+		return new Object[] { bestLCS, ic };
+	}
+
+	// private String createKey(String c1, String c2) {
+	// if (c1.compareTo(c2) < 0) {
+	// return new StringBuilder(c1).append("-").append(c2).toString();
+	// } else {
+	// return new StringBuilder(c2).append("-").append(c1).toString();
+	// }
+	// }
+
+	public CacheManager getCacheManager() {
+		return cacheManager;
+	}
+
+	public ClassifierEvaluationDao getClassifierEvaluationDao() {
+		return classifierEvaluationDao;
+	}
+
+	public ConceptDao getConceptDao() {
+		return conceptDao;
+	}
+
+	@Override
+	public ConceptGraph getConceptGraph() {
+		return cg;
+	}
+
+	public String getConceptGraphName() {
+		return conceptGraphName;
+	}
+
+	public String getConceptSetName() {
+		return conceptSetName;
+	}
+
+	public String getCorpusName() {
+		return corpusName;
+	}
+
+	@Override
+	public Map<String, BitSet> getCuiTuiMap() {
+		return cuiTuiMap;
+	}
+
+	// /**
+	// * get the concept with the lowest Information Content of all the LCSs.
+	// * Functionality copied from umls interface.
+	// *
+	// * @todo make this configurable/add a parameter - avg/min/max/median?
+	// * @param lcses
+	// * @return
+	// */
+	// public double getIC(Iterable<String> lcses) {
+	// double ic = 0;
+	// for (String lcs : lcses) {
+	// double ictmp = getIC(lcs);
+	// if (ic < ictmp)
+	// ic = ictmp;
+	// }
+	// return ic;
+	// }
+	//
+	// public double getIC(String concept1) {
+	// Double dRetVal = corpusICMap.get(concept1);
+	// if (dRetVal != null)
+	// return (double) dRetVal;
+	// else
+	// return 0;
+	// }
+
+	@Override
+	public double getIC(String concept, boolean intrinsicICMap) {
+		double ic = 0d;
+		if (intrinsicICMap) {
+			ConcRel cr = this.cg.getConceptMap().get(concept);
+			if (cr != null)
+				ic = cr.getIntrinsicInfoContent();
+		} else {
+			Double icC = null;
+			if (isPreload()) {
+				// we preloaded all ic - just look in the cache
+				icC = this.corpusICMap.get(concept);
+			} else {
+				// we need to load the ic from the database on demand
+				Map<String, FeatureRank> frMap = getICOnDemand(
+						new HashSet<String>(Arrays.asList(concept)), false);
+				if (frMap.containsKey(concept))
+					return frMap.get(concept).getEvaluation();
+			}
+			if (icC != null)
+				ic = icC;
+		}
+		return ic;
+		// if (isPreload()) {
+		// ConceptInfo ci = this.getPreloadedConceptInfo(concept);
+		// if (ci != null)
+		// return intrinsicICMap ? ci.getIntrinsicIC() : ci.getCorpusIC();
+		// } else {
+		// Map<String, FeatureRank> frMap = getICOnDemand(new HashSet<String>(
+		// Arrays.asList(concept)), intrinsicICMap);
+		// if (frMap.containsKey(concept))
+		// return frMap.get(concept).getEvaluation();
+		// }
+		// return 0d;
+	}
+
+	@Override
+	public int getDepth(String concept) {
+		// if (isPreload()) {
+		// // preloaded all concept info - depth should be there
+		// ConceptInfo ci = this.getPreloadedConceptInfo(concept);
+		// if (ci != null)
+		// return (int) ci.getDepth();
+		// } else {
+		// // get the feature ranks for the intrinsic infocontent -
+		// // rank = depth
+		// Map<String, FeatureRank> frMap = getICOnDemand(new HashSet<String>(
+		// Arrays.asList(concept)), true);
+		// if (frMap.containsKey(concept))
+		// return frMap.get(concept).getRank();
+		// }
+		ConcRel cr = this.cg.getConceptMap().get(concept);
+		if (cr != null)
+			return cr.getDepth();
+		return 0;
+	}
+
+	public int getLCS(String concept1, String concept2, Set<String> lcses,
+			List<LCSPath> lcsPaths) {
+		int lcsDist = 0;
+		ConcRel cr1 = getConceptGraph().getConceptMap().get(concept1);
+		ConcRel cr2 = getConceptGraph().getConceptMap().get(concept2);
+		if (cr1 != null && cr2 != null) {
+			lcses.clear();
+			if (lcsPaths == null) {
+				// no need to get paths which we don't cache - look in the cache
+				lcsDist = getLCSFromCache(cr1, cr2, lcses);
+			} else {
+				lcsPaths.clear();
+				// need to get paths - compute the lcses and their paths
+				lcsDist = lcs(concept1, concept2, lcsPaths);
+				for (LCSPath lcsPath : lcsPaths) {
+					lcses.add(lcsPath.getLcs());
+				}
+			}
+		} else {
+			if (log.isDebugEnabled()) {
+				if (cr1 == null)
+					log.debug("could not find concept:" + concept1);
+				if (cr2 == null)
+					log.debug("could not find concept:" + concept2);
+			}
+		}
+		return lcsDist;
+	}
+
+	@SuppressWarnings("unchecked")
+	private int getLCSFromCache(ConcRel cr1, ConcRel cr2, Set<String> lcses) {
+		OrderedPair<String> cacheKey = new OrderedPair<String>(
+				cr1.getConceptID(), cr2.getConceptID());
+		Element e = this.lcsCache.get(cacheKey);
+		if (e != null) {
+			// hit the cache - unpack the lcs
+			if (e.getObjectValue() != null) {
+				Object[] val = (Object[]) e.getObjectValue();
+				lcses.addAll((Set<String>) val[1]);
+				return (Integer) val[0];
+			} else {
+				return -1;
+			}
+		} else {
+			// missed the cache - save the lcs
+			Object[] val = null;
+			Set<ConcRel> lcsCRSet = new HashSet<ConcRel>(2);
+			int dist = ConcRel.getLeastCommonConcept(cr1, cr2, lcsCRSet, null);
+			if (dist >= 0) {
+				val = new Object[2];
+				val[0] = dist;
+				for (ConcRel cr : lcsCRSet) {
+					lcses.add(cr.getConceptID());
+				}
+				val[1] = lcses;
+			}
+			e = new Element(cacheKey, val);
+			this.lcsCache.put(e);
+			return dist;
+		}
+	}
+
+	public String getLcsImputedType() {
+		return lcsImputedType;
+	}
+
+	public PageRankService getPageRankService() {
+		return pageRankService;
+	}
+
+	public Map<SimilarityMetricEnum, SimilarityMetric> getSimilarityMetricMap() {
+		return similarityMetricMap;
+	}
+
+	public PlatformTransactionManager getTransactionManager() {
+		return transactionManager;
+	}
+
+	@Override
+	public List<String> getTuiList() {
+		return this.tuiList;
+	}
+
+	public void init() {
+		log.info("begin initialization for concept graph: " + conceptGraphName);
+		TransactionTemplate t = new TransactionTemplate(this.transactionManager);
+		t.setPropagationBehavior(TransactionTemplate.PROPAGATION_REQUIRES_NEW);
+		t.execute(new TransactionCallback<Object>() {
+			@Override
+			public Object doInTransaction(TransactionStatus arg0) {
+				cg = conceptDao.getConceptGraph(conceptGraphName);
+				if (cg == null) {
+					log.warn("concept graph null, name: " + conceptGraphName);
+					return null;
+				}
+				if (isPreload()) {
+					initInfoContent();
+					initCuiTuiMapFromCorpus();
+				}
+				initSimilarityMetricMap();
+				return null;
+			}
+		});
+		this.lcsCache = getCacheManager().getCache("lcsCache");
+		log.info("end initialization for concept graph: " + conceptGraphName);
+	}
+
+	/**
+	 * load cui-tui for the specified corpus from the MRSTY table
+	 */
+	public void initCuiTuiMapFromCorpus() {
+		// don't duplicate tui strings to save memory
+		SortedMap<String, String> tuiMap = new TreeMap<String, String>();
+		Map<String, Set<String>> tmpTuiCuiMap = new HashMap<String, Set<String>>();
+		List<Object[]> listCuiTui = this.classifierEvaluationDao
+				.getCorpusCuiTuis(this.getCorpusName(),
+						this.getConceptGraphName(), this.getConceptSetName());
+		for (Object[] cuiTui : listCuiTui) {
+			String cui = (String) cuiTui[0];
+			String tui = (String) cuiTui[1];
+			addCuiTuiToMap(tmpTuiCuiMap, tuiMap, cui, tui);
+		}
+		// map of tui - bitset index
+		SortedMap<String, Integer> mapTuiIndex = new TreeMap<String, Integer>();
+		// list of tuis corresponding to bitset indices
+		List<String> tmpTuiList = new ArrayList<String>(tuiMap.size());
+		int index = 0;
+		for (String tui : tuiMap.keySet()) {
+			mapTuiIndex.put(tui, index++);
+			tmpTuiList.add(tui);
+		}
+		this.tuiList = Collections.unmodifiableList(tmpTuiList);
+		// convert list of cuis into bitsets
+		// Map<String, BitSet> tmpCuiTuiBitsetMap = new HashMap<String,
+		// BitSet>();
+		ImmutableMap.Builder<String, BitSet> cuiTuiBitsetMapBuilder = new ImmutableMap.Builder<String, BitSet>();
+		for (Map.Entry<String, Set<String>> cuiTuiMapEntry : tmpTuiCuiMap
+				.entrySet()) {
+			// tmpCuiTuiBitsetMap.put(cuiTuiMapEntry.getKey(),
+			// tuiListToBitset(cuiTuiMapEntry.getValue(), mapTuiIndex));
+			cuiTuiBitsetMapBuilder.put(cuiTuiMapEntry.getKey(),
+					tuiListToBitset(cuiTuiMapEntry.getValue(), mapTuiIndex));
+		}
+		// this.cuiTuiMap = Collections.unmodifiableMap(tmpCuiTuiBitsetMap);
+		this.cuiTuiMap = cuiTuiBitsetMapBuilder.build();
+	}
+
+	/**
+	 * initialize information content caches TODO replace strings with concept
+	 * ids from conceptGraph to save memory
+	 */
+	private void initInfoContent() {
+		// log.info("loading intrinsic infocontent for concept graph: "
+		// + conceptGraphName);
+		// List<ConceptInfo> listConceptInfo = classifierEvaluationDao
+		// .getIntrinsicInfoContent(conceptGraphName);
+		// if (listConceptInfo.isEmpty()) {
+		// log.warn("intrinsic info content not available! most similarity measures will not work");
+		// }
+		// this.conceptInfoCache = new ConceptInfo[cg.getConceptMap().size()];
+		// for (ConceptInfo ci : listConceptInfo) {
+		// ConcRel cr = cg.getConceptMap().get(ci.getConceptId());
+		// if (cr != null) {
+		// // save a little memory by reusing the string
+		// ci.setConceptId(cr.getConceptID());
+		// conceptInfoCache[cr.getNodeIndex()] = ci;
+		// }
+		// }
+		// fill intrinsicIC
+		// Map<String, FeatureRank> intrinsicICMap = classifierEvaluationDao
+		// .getIntrinsicInfoContent(conceptGraphName);
+		// for (Map.Entry<String, FeatureRank> icMapEntry : intrinsicICMap
+		// .entrySet()) {
+		// FeatureRank r = icMapEntry.getValue();
+		// ConcRel cr = cg.getConceptMap().get(r.getFeatureName());
+		// if (cr != null) {
+		// ConceptInfo ci = new ConceptInfo();
+		// ci.setConceptId(cr.getConceptID());
+		// ci.setDepth(r.getRank());
+		// ci.setIntrinsicIC(r.getEvaluation());
+		// conceptInfoMap.put(ci.getConceptId(), ci);
+		// }
+		// }
+		// fill corpusIC
+		log.info("loading corpus infocontent for corpusName=" + corpusName
+					+ ", conceptGraphName=" + conceptGraphName
+					+ ", conceptSetName=" + conceptSetName);
+		Map<String, Double> corpusICMap = classifierEvaluationDao
+				.getInfoContent(corpusName, conceptGraphName,
+						this.conceptSetName);
+		if (corpusICMap == null || corpusICMap.isEmpty()) {
+			log.warn("IC not found");
+		}
+		ImmutableMap.Builder<String, Double> mb = new ImmutableMap.Builder<String, Double>();
+		for (Map.Entry<String, Double> corpusICEntry : corpusICMap.entrySet()) {
+			ConcRel cr = cg.getConceptMap().get(corpusICEntry.getKey());
+			if (cr != null) {
+				mb.put(cr.getConceptID(), corpusICEntry.getValue());
+			}
+		}
+		this.corpusICMap = mb.build();
+		// ConceptInfo ci = this.conceptInfoCache[cr.getNodeIndex()];
+		// if (ci == null) {
+		// // this shouldn't happen! there should be intrinsic ic for
+		// // this concept
+		// ci = new ConceptInfo();
+		// ci.setConceptId(cr.getConceptID());
+		// this.conceptInfoCache[cr.getNodeIndex()] = ci;
+		// }
+		// ci.setCorpusIC(corpusICEntry.getValue());
+		// }
+		// }
+	}
+
+	/**
+	 * initialize the metrics
+	 */
+	private void initSimilarityMetricMap() {
+		log.info("initializing similarity measures");
+		// Double maxIC = this.classifierEvaluationDao.getMaxFeatureEvaluation(
+		// null, null, null,
+		// IntrinsicInfoContentEvaluator.INTRINSIC_INFOCONTENT, 0, 0,
+		// conceptGraphName);
+		// Integer maxDepth = this.classifierEvaluationDao
+		// .getMaxDepth(conceptGraphName);
+		double maxIC = this.cg.getIntrinsicICMax();
+		int maxDepth = this.cg.getDepthMax();
+		this.similarityMetricMap = new HashMap<SimilarityMetricEnum, SimilarityMetric>(
+				SimilarityMetricEnum.values().length);
+		if (maxDepth > 0) {
+			this.similarityMetricMap.put(SimilarityMetricEnum.LCH,
+					new LCHMetric(this, maxDepth));
+			this.similarityMetricMap.put(SimilarityMetricEnum.LIN,
+					new LinMetric(this, false));
+			this.similarityMetricMap.put(SimilarityMetricEnum.INTRINSIC_LIN,
+					new LinMetric(this, true));
+			this.similarityMetricMap.put(SimilarityMetricEnum.INTRINSIC_LCH,
+					new IntrinsicLCHMetric(this, maxIC));
+			this.similarityMetricMap.put(SimilarityMetricEnum.PATH,
+					new PathMetric(this));
+			this.similarityMetricMap.put(SimilarityMetricEnum.INTRINSIC_PATH,
+					new IntrinsicPathMetric(this, maxIC));
+			this.similarityMetricMap.put(SimilarityMetricEnum.RADA,
+					new RadaMetric(this, maxDepth));
+			this.similarityMetricMap.put(SimilarityMetricEnum.INTRINSIC_RADA,
+					new IntrinsicRadaMetric(this, maxIC));
+			this.similarityMetricMap.put(SimilarityMetricEnum.SOKAL,
+					new SokalSneathMetric(this));
+			this.similarityMetricMap.put(SimilarityMetricEnum.JACCARD,
+					new JaccardMetric(this));
+			this.similarityMetricMap.put(SimilarityMetricEnum.WUPALMER,
+					new WuPalmerMetric(this));
+		} else {
+			this.similarityMetricMap.put(SimilarityMetricEnum.PAGERANK,
+					new PageRankMetric(this, this.getPageRankService()));
+		}
+	}
+
+	public boolean isPreload() {
+		return preload;
+	}
+
+	// /*
+	// * (non-Javadoc)
+	// *
+	// * @see org.apache.ctakes.ytex.kernel.ConceptSimilarity#lch(java.lang.String,
+	// * java.lang.String)
+	// */
+	// public double lch(String concept1, String concept2) {
+	// double dm = 2 * cg.getDepthMax() + 1.0;
+	// ConcRel cr1 = cg.getConceptMap().get(concept1);
+	// ConcRel cr2 = cg.getConceptMap().get(concept2);
+	// if (cr1 != null && cr2 != null) {
+	// Set<String> lcses = new HashSet<String>();
+	// int lcsDist = getLCSFromCache(cr1, cr2, lcses);
+	// // leacock is defined as -log([path length]/(2*[depth])
+	// double lch = -Math.log(((double) lcsDist + 1.0) / dm);
+	// // scale to depth
+	// return lch / Math.log(dm);
+	// } else {
+	// if (log.isDebugEnabled()) {
+	// if (cr1 == null)
+	// log.debug("could not find concept:" + concept1);
+	// if (cr2 == null)
+	// log.debug("could not find concept:" + concept2);
+	// }
+	// return 0;
+	// }
+	// }
+
+	public int lcs(String concept1, String concept2, List<LCSPath> lcsPaths) {
+		ConcRel cr1 = cg.getConceptMap().get(concept1);
+		ConcRel cr2 = cg.getConceptMap().get(concept2);
+		int dist = -1;
+		if (cr1 != null && cr2 != null) {
+			Set<ConcRel> crlcses = new HashSet<ConcRel>();
+			Map<ConcRel, LCSPath> crpaths = new HashMap<ConcRel, LCSPath>();
+			dist = ConcRel.getLeastCommonConcept(cr1, cr2, crlcses, crpaths);
+			lcsPaths.addAll(crpaths.values());
+		}
+		return dist;
+	}
+
+	// public double lin(String concept1, String concept2) {
+	// return filteredLin(concept1, concept2, null);
+	// }
+
+	/**
+	 * For the given label and cutoff, get the corresponding concepts whose
+	 * propagated ig meets the threshold. Used by lin kernel to find concepts
+	 * that actually have a non-trivial similarity
+	 * 
+	 * @param label
+	 *            label
+	 * @param rankCutoff
+	 *            cutoff
+	 * @param conceptFilter
+	 *            set to fill with concepts
+	 * @return double minimum evaluation
+	 */
+	@Override
+	public double loadConceptFilter(String label, int rankCutoff,
+			Map<String, Double> conceptFilter) {
+		List<FeatureRank> imputedConcepts = this.classifierEvaluationDao
+				.getImputedFeaturesByPropagatedCutoff(corpusName,
+						conceptSetName, label, lcsImputedType
+								+ ImputedFeatureEvaluator.SUFFIX_IMPUTED,
+						conceptGraphName, lcsImputedType
+								+ ImputedFeatureEvaluator.SUFFIX_PROP,
+						rankCutoff);
+		double minEval = 1d;
+		for (FeatureRank r : imputedConcepts) {
+			conceptFilter.put(r.getFeatureName(), r.getEvaluation());
+			if (minEval >= r.getEvaluation())
+				minEval = r.getEvaluation();
+		}
+		return minEval;
+	}
+
+	public void setCacheManager(CacheManager cacheManager) {
+		this.cacheManager = cacheManager;
+	}
+
+	public void setClassifierEvaluationDao(
+			ClassifierEvaluationDao classifierEvaluationDao) {
+		this.classifierEvaluationDao = classifierEvaluationDao;
+	}
+
+	public void setConceptDao(ConceptDao conceptDao) {
+		this.conceptDao = conceptDao;
+	}
+
+	public void setConceptGraphName(String conceptGraphName) {
+		this.conceptGraphName = conceptGraphName;
+	}
+
+	public void setConceptSetName(String conceptSetName) {
+		this.conceptSetName = conceptSetName;
+	}
+
+	public void setCorpusName(String corpusName) {
+		this.corpusName = corpusName;
+	}
+
+	public void setLcsImputedType(String lcsImputedType) {
+		this.lcsImputedType = lcsImputedType;
+	}
+
+	// double minEval = 1d;
+	// List<FeatureRank> listPropagatedConcepts = classifierEvaluationDao
+	// .getTopFeatures(corpusName, conceptSetName, label,
+	// ImputedFeatureEvaluator.MeasureType.INFOGAIN.toString()
+	// + ImputedFeatureEvaluator.SUFFIX_PROP, 0, 0,
+	// conceptGraphName, rankCutoff);
+	// for (FeatureRank r : listPropagatedConcepts) {
+	// ConcRel cr = cg.getConceptMap().get(r.getFeatureName());
+	// if (cr != null) {
+	// addSubtree(conceptFilterSet, cr);
+	// }
+	// if (r.getEvaluation() < minEval)
+	// minEval = r.getEvaluation();
+	// }
+	// return minEval;
+	// }
+	//
+	// /**
+	// * add all children of parent to conceptSet. Limit only to children that
+	// * actually appear in the corpus
+	// *
+	// * @param conceptSet
+	// * set of concepts to add ids to
+	// * @param parent
+	// * parent which will be added to the conceptSet
+	// * @param corpusICSet
+	// * set of concepts and hypernyms contained in corpus
+	// */
+	// private void addSubtree(Map<String, Double> conceptSet, ConcRel parent) {
+	// if (!conceptSet.containsKey(parent.getConceptID())
+	// && conceptFreq.containsKey(parent.getConceptID())) {
+	// conceptSet.put(parent.getConceptID(), 0d);
+	// for (ConcRel child : parent.getChildren()) {
+	// addSubtree(conceptSet, child);
+	// }
+	// }
+	// }
+
+	public void setPageRankService(PageRankService pageRankService) {
+		this.pageRankService = pageRankService;
+	}
+
+	public void setPreload(boolean preload) {
+		this.preload = preload;
+	}
+
+	public void setSimilarityMetricMap(
+			Map<SimilarityMetricEnum, SimilarityMetric> similarityMetricMap) {
+		this.similarityMetricMap = similarityMetricMap;
+	}
+
+	public void setTransactionManager(
+			PlatformTransactionManager transactionManager) {
+		this.transactionManager = transactionManager;
+	}
+
+	@Override
+	public List<ConceptPairSimilarity> similarity(
+			List<ConceptPair> conceptPairs, List<SimilarityMetricEnum> metrics,
+			Map<String, Double> conceptFilter, boolean lcs) {
+		List<ConceptPairSimilarity> conceptSimMap = new ArrayList<ConceptPairSimilarity>(
+				conceptPairs.size());
+		for (ConceptPair conceptPair : conceptPairs) {
+			conceptSimMap.add(similarity(metrics, conceptPair.getConcept1(),
+					conceptPair.getConcept2(), conceptFilter, lcs));
+		}
+		return conceptSimMap;
+	}
+
+	/**
+	 * 
+	 */
+	@Override
+	public ConceptPairSimilarity similarity(List<SimilarityMetricEnum> metrics,
+			String concept1, String concept2,
+			Map<String, Double> conceptFilter, boolean lcs) {
+		// allocate simInfo if this isn't provided
+		SimilarityInfo simInfo = new SimilarityInfo();
+		if (lcs)
+			simInfo.setLcsPaths(new ArrayList<LCSPath>(1));
+		// allocate result map
+		List<Double> similarities = new ArrayList<Double>(metrics.size());
+		if (cg != null) {
+			// iterate over metrics, compute, stuff in map
+			for (SimilarityMetricEnum metric : metrics) {
+				double sim = this.similarityMetricMap.get(metric).similarity(
+						concept1, concept2, conceptFilter, simInfo);
+				similarities.add(sim);
+			}
+		}
+		ConceptPairSimilarity csim = new ConceptPairSimilarity();
+		csim.setConceptPair(new ConceptPair(concept1, concept2));
+		csim.setSimilarities(similarities);
+		csim.setSimilarityInfo(simInfo);
+		return csim;
+	}
+
+	/**
+	 * convert the list of tuis into a bitset
+	 * 
+	 * @param tuis
+	 * @param mapTuiIndex
+	 * @return
+	 */
+	private BitSet tuiListToBitset(Set<String> tuis,
+			SortedMap<String, Integer> mapTuiIndex) {
+		BitSet bs = new BitSet(mapTuiIndex.size());
+		for (String tui : tuis) {
+			bs.set(mapTuiIndex.get(tui));
+		}
+		return bs;
+	}
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicLCHMetric.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicLCHMetric.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicLCHMetric.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicLCHMetric.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,38 @@
+package org.apache.ctakes.ytex.kernel.metric;
+
+import java.util.Map;
+
+/**
+ * compute intrinsic LCH as in eqn 28 from
+ * http://dx.doi.org/10.1016/j.jbi.2011.03.013
+ * 
+ * Scale to unit interval
+ * 
+ * @author vijay
+ * 
+ */
+public class IntrinsicLCHMetric extends BaseSimilarityMetric {
+	double logMaxIC2 = 0d;
+
+	public IntrinsicLCHMetric(ConceptSimilarityService simSvc, Double maxIC) {
+		super(simSvc);
+		if (maxIC != null)
+			this.logMaxIC2 = Math.log(2 * maxIC.doubleValue()) + 1d;
+	}
+
+	@Override
+	public double similarity(String concept1, String concept2,
+			Map<String, Double> conceptFilter, SimilarityInfo simInfo) {
+		double sim = 0d;
+		if (logMaxIC2 != 0d) {
+			double ic1 = simSvc.getIC(concept1, true);
+			double ic2 = simSvc.getIC(concept2, true);
+			double lcsIC = initLcsIC(concept1, concept2, conceptFilter,
+					simInfo, true);
+			sim = 1 - (Math.log(ic1 + ic2 - 2 * (lcsIC) + 1) / logMaxIC2);
+
+		}
+		return sim;
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicPathMetric.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicPathMetric.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicPathMetric.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicPathMetric.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,35 @@
+package org.apache.ctakes.ytex.kernel.metric;
+
+import java.util.Map;
+
+
+/**
+ * compute Intrinsic path distance. Scale the distance to the unit
+ * interval using max IC.
+ * 
+ * @author vijay
+ * 
+ */
+public class IntrinsicPathMetric extends BaseSimilarityMetric {
+	Double maxIC;
+
+	public IntrinsicPathMetric(ConceptSimilarityService simSvc, Double maxIC) {
+		super(simSvc);
+		this.maxIC = maxIC;
+	}
+
+	@Override
+	public double similarity(String concept1, String concept2,
+			Map<String, Double> conceptFilter, SimilarityInfo simInfo) {
+		if (maxIC == null)
+			return 0d;
+		double lcsIC = this.initLcsIC(concept1, concept2, conceptFilter, simInfo, true);
+		if (lcsIC == 0d)
+			return 0d;
+		double ic1 = simSvc.getIC(concept1, true);
+		double ic2 = simSvc.getIC(concept2, true);
+		// scale to unit interval
+		return 1d/(ic1 + ic2 - (2 * lcsIC) + 1);
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicRadaMetric.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicRadaMetric.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicRadaMetric.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicRadaMetric.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,37 @@
+package org.apache.ctakes.ytex.kernel.metric;
+
+import java.util.Map;
+
+
+/**
+ * compute Intrinsic rada distance as in eqn 23 from
+ * http://dx.doi.org/10.1016/j.jbi.2011.03.013. Scale the distance to the unit
+ * interval using max IC. Convert to similarity metric by taking
+ * 1-scaled_distance.
+ * 
+ * @author vijay
+ * 
+ */
+public class IntrinsicRadaMetric extends BaseSimilarityMetric {
+	Double maxIC;
+
+	public IntrinsicRadaMetric(ConceptSimilarityService simSvc, Double maxIC) {
+		super(simSvc);
+		this.maxIC = maxIC;
+	}
+
+	@Override
+	public double similarity(String concept1, String concept2,
+			Map<String, Double> conceptFilter, SimilarityInfo simInfo) {
+		if (maxIC == null)
+			return 0d;
+		double lcsIC = this.initLcsIC(concept1, concept2, conceptFilter, simInfo, true);
+		if (lcsIC == 0d)
+			return 0d;
+		double ic1 = simSvc.getIC(concept1, true);
+		double ic2 = simSvc.getIC(concept2, true);
+		// scale to unit interval
+		return 1d - (ic1 + ic2 - (2 * lcsIC)) / (2 * maxIC);
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/JaccardMetric.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/JaccardMetric.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/JaccardMetric.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/JaccardMetric.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,30 @@
+package org.apache.ctakes.ytex.kernel.metric;
+
+import java.util.Map;
+
+
+/**
+ * Jaccard metric as in eqn 13 from http://dx.doi.org/10.1016/j.jbi.2011.03.013
+ * 
+ * @author vijay
+ * 
+ */
+public class JaccardMetric extends BaseSimilarityMetric {
+
+	public JaccardMetric(ConceptSimilarityService simSvc) {
+		super(simSvc);
+	}
+
+	@Override
+	public double similarity(String concept1, String concept2,
+			Map<String, Double> conceptFilter, SimilarityInfo simInfo) {
+		double lcsIC = this.initLcsIC(concept1, concept2, conceptFilter,
+				simInfo, true);
+		if (lcsIC == 0d)
+			return 0d;
+		double ic1 = simSvc.getIC(concept1, true);
+		double ic2 = simSvc.getIC(concept2, true);
+		return lcsIC / (ic1 + ic2 - lcsIC);
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/LCHMetric.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/LCHMetric.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/LCHMetric.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/LCHMetric.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,33 @@
+package org.apache.ctakes.ytex.kernel.metric;
+
+import java.util.Map;
+
+public class LCHMetric extends BaseSimilarityMetric {
+	/**
+	 * log(max depth * 2)
+	 */
+	double logdm = 0d;
+
+	@Override
+	public double similarity(String concept1, String concept2,
+			Map<String, Double> conceptFilter, SimilarityInfo simInfo) {
+		if (logdm != 0d) {
+			initLCSes(concept1, concept2, simInfo);
+			if (simInfo.getLcsDist() > 0) {
+				// double lch = logdm - Math.log((double) simInfo.getLcsDist());
+				// // scale to depth
+				// return lch / logdm;
+				return 1 - (Math.log((double) simInfo.getLcsDist()) / logdm);
+			}
+		}
+		return 0d;
+	}
+
+	public LCHMetric(ConceptSimilarityService simSvc, Integer maxDepth) {
+		super(simSvc);
+		if (maxDepth != null) {
+			this.logdm = Math.log(2 * maxDepth);
+		}
+	}
+
+}



Mime
View raw message