Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 77B3910EF6 for ; Mon, 16 Dec 2013 16:34:01 +0000 (UTC) Received: (qmail 82046 invoked by uid 500); 16 Dec 2013 16:33:14 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 81788 invoked by uid 500); 16 Dec 2013 16:32:58 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 81617 invoked by uid 99); 16 Dec 2013 16:32:41 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 16 Dec 2013 16:32:41 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED,T_FRT_ADULT2 X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 16 Dec 2013 16:32:17 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 508C22388C6C; Mon, 16 Dec 2013 16:30:59 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1551254 [21/26] - in /ctakes/branches/ytex: ctakes-ytex-res/ ctakes-ytex-res/.settings/ ctakes-ytex-res/src/ ctakes-ytex-res/src/main/ ctakes-ytex-res/src/main/resources/ ctakes-ytex-res/src/main/resources/org/ ctakes-ytex-res/src/main/res... Date: Mon, 16 Dec 2013 16:30:40 -0000 To: commits@ctakes.apache.org From: vjapache@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20131216163059.508C22388C6C@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NormKernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NormKernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NormKernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NormKernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,114 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import net.sf.ehcache.Cache; +import net.sf.ehcache.CacheManager; +import net.sf.ehcache.Element; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.ctakes.ytex.kernel.tree.Node; + + +/** + * Return norm of delegate kernel: k(x,y)/sqrt(k(x,x)*k(y,y). If + * the object is a org.apache.ctakes.ytex.kernel.Node, then save the norm in the node for future + * reference. else if cacheNorm = true, save the norm in the cache for future + * reference. If the delegate kernel is fast (e.g. it's using caching itself / + * trivial operation) caching the norm will slow things down. + * + * @author vijay + * + */ +public class NormKernel implements Kernel { + private static final Log log = LogFactory.getLog(NormKernel.class); + + private Cache normCache; + private CacheManager cacheManager; + private Kernel delegateKernel; + private boolean cacheNorm = true; + + public boolean isCacheNorm() { + return cacheNorm; + } + + public void setCacheNorm(boolean cacheNorm) { + this.cacheNorm = cacheNorm; + } + + public NormKernel(Kernel delegateKernel) { + this.delegateKernel = delegateKernel; + } + + public NormKernel() { + super(); + } + + public CacheManager getCacheManager() { + return cacheManager; + } + + public void setCacheManager(CacheManager cacheManager) { + this.cacheManager = cacheManager; + } + + public Kernel getDelegateKernel() { + return delegateKernel; + } + + public void setDelegateKernel(Kernel delegateKernel) { + this.delegateKernel = delegateKernel; + } + + /** + * compute the norm. + * + * @param o1 + * @return + */ + public double getNorm(Object o1) { + Double norm = null; + if (o1 != null) { + if (o1 instanceof Node) { + // look in node if this is a node + norm = ((Node) o1).getNorm(); + } else if (this.isCacheNorm()) { + // look in cache otherwise + Element cachedNorm = null; + cachedNorm = normCache.get(o1); + if (cachedNorm != null) { + norm = (Double) cachedNorm.getValue(); + } + } + if (norm == null) { + // couldn't get cached norm - compute it + norm = Math.sqrt(delegateKernel.evaluate(o1, o1)); + } + if (o1 instanceof Node) { + ((Node) o1).setNorm(norm); + } else if (this.isCacheNorm()) { + normCache.put(new Element(o1, norm)); + } + } + return norm; + } + + public double evaluate(Object o1, Object o2) { + double d = 0; + if (o1 == null || o2 == null) { + d = 0; + } else { + double norm1 = getNorm(o1); + double norm2 = getNorm(o2); + if (norm1 != 0 && norm2 != 0) + d = delegateKernel.evaluate(o1, o2) / (norm1 * norm2); + } + if (log.isTraceEnabled()) { + log.trace("K<" + o1 + "," + o2 + "> = " + d); + } + return d; + } + + public void init() { + normCache = cacheManager.getCache("normCache"); + } +} \ No newline at end of file Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/ProductKernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/ProductKernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/ProductKernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/ProductKernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,47 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * compute the product of delegate kernels + * + * @author vijay + * + */ +public class ProductKernel extends CacheKernel { + private static final Log log = LogFactory.getLog(ProductKernel.class); + /** + * use array instead of list. when running thread dumps, see a lot of action + * in list.size(). may be a fluke, but can't hurt + */ + Kernel[] delegateKernels; + + public List getDelegateKernels() { + return Arrays.asList(delegateKernels); + } + + public void setDelegateKernels(List delegateKernels) { + this.delegateKernels = new Kernel[delegateKernels.size()]; + for (int i = 0; i < this.delegateKernels.length; i++) + this.delegateKernels[i] = delegateKernels.get(i); + } + + @Override + public double innerEvaluate(Object o1, Object o2) { + double d = 1; + for (Kernel k : delegateKernels) { + d *= k.evaluate(o1, o2); + if (d == 0) + break; + } + if (log.isTraceEnabled()) { + log.trace(new StringBuilder("K<").append(o1).append(",").append(o2) + .append("> = ").append(d)); + } + return d; + } +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticSimKernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticSimKernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticSimKernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticSimKernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,139 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.ctakes.ytex.kernel.metric.ConceptPairSimilarity; +import org.apache.ctakes.ytex.kernel.metric.ConceptSimilarityService; +import org.apache.ctakes.ytex.kernel.metric.ConceptSimilarityService.SimilarityMetricEnum; +import org.springframework.beans.factory.InitializingBean; + + +public class SemanticSimKernel extends CacheKernel implements InitializingBean { + private static final Log log = LogFactory.getLog(LinKernel.class); + private Map conceptFilter = null; + private ConceptSimilarityService conceptSimilarityService; + private double cutoff = 0; + private String label = null; + private String metricNames; + private List metrics; + private Integer rankCutoff = null; + + @Override + public void afterPropertiesSet() throws Exception { + super.afterPropertiesSet(); + this.initializeConceptFilter(); + } + + /** + * override CacheKernel - don't bother caching evaluation if the concepts + * are not in the conceptFilter, or if they are identical. + */ + @Override + public double evaluate(Object o1, Object o2) { + String c1 = (String) o1; + String c2 = (String) o2; + double d = 0; + if (c1 != null && c2 != null) { + if (c1.equals(c2)) { + d = 1d; + } else if (this.conceptFilter == null + || (conceptFilter.containsKey((String) o1) && conceptFilter + .containsKey((String) o2))) { + d = super.evaluate(o1, o2); + } + } + return d; + } + + public ConceptSimilarityService getConceptSimilarityService() { + return conceptSimilarityService; + } + + public double getCutoff() { + return cutoff; + } + + public String getLabel() { + return label; + } + + public String getMetricNames() { + return metricNames; + } + + public Integer getRankCutoff() { + return rankCutoff; + } + + protected void initializeConceptFilter() { + if (rankCutoff != null) { + conceptFilter = new HashMap(); + cutoff = conceptSimilarityService.loadConceptFilter(label, + rankCutoff, conceptFilter); + if (conceptFilter.isEmpty()) { + log.warn("no concepts that matched the threshold for supervised semantic similarity. label=" + + label + ", rankCutoff=" + rankCutoff); + } + } + } + + /** + * return the product of all the similarity metrics + */ + @Override + public double innerEvaluate(Object o1, Object o2) { + double d = 0; + String c1 = (String) o1; + String c2 = (String) o2; + if (c1 != null && c2 != null) { + if (c1.equals(c2)) { + d = 1; + } else { + d = 1; + ConceptPairSimilarity csim = conceptSimilarityService + .similarity(metrics, c1, c2, conceptFilter, false); + for (Double simVal : csim.getSimilarities()) { + d *= simVal; + } + } + } + return d; + } + + public void setConceptSimilarityService( + ConceptSimilarityService conceptSimilarityService) { + this.conceptSimilarityService = conceptSimilarityService; + } + + public void setCutoff(double cutoff) { + this.cutoff = cutoff; + } + + public void setLabel(String label) { + this.label = label; + } + + public void setMetricNames(String metricNames) { + this.metricNames = metricNames; + this.metrics = new ArrayList(); + for(String metricName : metricNames.split(",")) { + SimilarityMetricEnum s = SimilarityMetricEnum.valueOf(metricName); + if(s == null) { + throw new RuntimeException("invalid metric name: " + metricName); + } + metrics.add(s); + } + } + + public void setRankCutoff(Integer rankCutoff) { + this.rankCutoff = rankCutoff; + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticTypeKernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticTypeKernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticTypeKernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SemanticTypeKernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,392 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import java.util.BitSet; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.ctakes.ytex.kernel.metric.ConceptSimilarityService; + + +/** + * Before comparing semantic distance, use this kernel to filter by semantic + * type. + *

+ * Modes: + *

  • MAINSUI (default): concept's main semantic types must overlap + *
  • TUI: concept's TUIs must overlap. + *

    + * The MAINSUI mode is taken from Sujeevan Aseervatham's semantic kernel. It + * maps all semantic types to a handful of semantic types. + *

    + * The corpusName parameter specifies the concepts for which cuis' semantic + * types will be loaded + * + * @author vijay + * + */ +public class SemanticTypeKernel extends CacheKernel { + private static final Log log = LogFactory.getLog(SemanticTypeKernel.class); + private static final String MAINSUI = "MAINSUI"; + private static final String TUI = "TUI"; + + public static int getMainSem(int sui) { + switch (sui) { + case 52: + case 53: + case 56: + case 51: + case 64: + case 55: + case 66: + case 57: + case 54: + return 0; + case 17: + case 29: + case 23: + case 30: + case 31: + case 22: + case 25: + case 26: + case 18: + case 21: + case 24: + return 1; + case 116: + case 195: + case 123: + case 122: + case 118: + case 103: + case 120: + case 104: + case 200: + case 111: + case 196: + case 126: + case 131: + case 125: + case 129: + case 130: + case 197: + case 119: + case 124: + case 114: + case 109: + case 115: + case 121: + case 192: + case 110: + case 127: + return 2; + case 185: + case 77: + case 169: + case 102: + case 78: + case 170: + case 171: + case 80: + case 81: + case 89: + case 82: + case 79: + return 3; + case 203: + case 74: + case 75: + return 4; + case 20: + case 190: + case 49: + case 19: + case 47: + case 50: + case 33: + case 37: + case 48: + case 191: + case 46: + case 184: + return 5; + case 87: + case 88: + case 28: + case 85: + case 86: + return 6; + case 83: + return 7; + case 100: + case 3: + case 11: + case 8: + case 194: + case 7: + case 12: + case 99: + case 13: + case 4: + case 96: + case 16: + case 9: + case 15: + case 1: + case 101: + case 2: + case 98: + case 97: + case 14: + case 6: + case 10: + case 204: // vng missing sui + case 5: + return 8; + case 71: + case 168: + case 73: + case 72: + case 167: + return 9; + case 91: + case 90: + return 10; + case 93: + case 92: + case 94: + case 95: + return 11; + case 38: + case 69: + case 68: + case 34: + case 70: + case 67: + return 12; + case 43: + case 201: + case 45: + case 41: + case 44: + case 42: + case 32: + case 40: + case 39: + return 13; + case 60: + case 65: + case 58: + case 59: + case 63: + case 62: + case 61: + return 14; + default: + break; + } + return -1; + } + + private ConceptSimilarityService conceptSimilarityService; + private String corpusName; + private Map> cuiMainSuiMap = new HashMap>(); + private Map cuiTuiMap = null; + private List tuiList = null; + private String cuiTuiQuery; + // private DataSource dataSource; + // private SimpleJdbcTemplate simpleJdbcTemplate; + // private JdbcTemplate jdbcTemplate; + + private String mode = "MAINSUI"; + + // private PlatformTransactionManager transactionManager; + + // private void addCuiTuiToMap(Map tuiMap, String cui, + // String tui) { + // // get 'the' tui string + // if (tuiMap.containsKey(tui)) + // tui = tuiMap.get(tui); + // else + // tuiMap.put(tui, tui); + // Set tuis = cuiTuiMap.get(cui); + // if (tuis == null) { + // tuis = new HashSet(); + // cuiTuiMap.put(cui, tuis); + // } + // tuis.add(tui); + // } + + /** + * concepts have overlapping semantic types? yes return 1, else return 0 + */ + public double innerEvaluate(Object o1, Object o2) { + if (o1 == null || o2 == null) + return 0; + else if (o1.equals(o2)) + return 1.0; + else if (this.getMode() == null || this.getMode().length() == 0 + || MAINSUI.equals(this.getMode())) + return mainSuiCheck(o1, o2); + else if (TUI.equals(this.getMode())) + return tuiCheck(o1, o2); + else { + log.error("invalid mode"); + throw new RuntimeException("invalid mode"); + } + } + + public ConceptSimilarityService getConceptSimilarityService() { + return conceptSimilarityService; + } + + public String getCorpusName() { + return corpusName; + } + + public String getCuiTuiQuery() { + return cuiTuiQuery; + } + + // + // public DataSource getDataSource() { + // return dataSource; + // } + + public String getMode() { + return mode; + } + + // public PlatformTransactionManager getTransactionManager() { + // return transactionManager; + // } + + public void init() { + // TransactionTemplate t = new + // TransactionTemplate(this.transactionManager); + // t.setPropagationBehavior(TransactionTemplate.PROPAGATION_REQUIRES_NEW); + // t.execute(new TransactionCallback() { + // @Override + // public Object doInTransaction(TransactionStatus arg0) { + cuiTuiMap = conceptSimilarityService.getCuiTuiMap(); + tuiList = conceptSimilarityService.getTuiList(); + initCuiMainSuiMap(); + // return null; + // } + // }); + } + + /** + * init the cui -> 'main sui' map. + */ + private void initCuiMainSuiMap() { + if (cuiTuiMap != null) { + for (Map.Entry cuiTui : cuiTuiMap.entrySet()) { + cuiMainSuiMap.put(cuiTui.getKey(), + tuiToMainSui(cuiTui.getValue())); + } + } + } + + // /** + // * init cui-tui map from query + // */ + // public void initCuiTuiMapFromQuery() { + // this.jdbcTemplate.query(this.cuiTuiQuery, new RowCallbackHandler() { + // // don't duplicate tui strings to save memory + // Map tuiMap = new HashMap(); + // + // @Override + // public void processRow(ResultSet rs) throws SQLException { + // String cui = rs.getString(1); + // String tui = rs.getString(2); + // addCuiTuiToMap(tuiMap, cui, tui); + // } + // }); + // } + + /** + * + * @param o1 + * cui + * @param o2 + * cui + * @return concepts have overlapping main semantic types, return 1, else + * return 0 + */ + private double mainSuiCheck(Object o1, Object o2) { + Set tuis1 = cuiMainSuiMap.get((String) o1); + Set tuis2 = cuiMainSuiMap.get((String) o2); + // only compare the two if they have a common semantic type + if (tuis1 != null && tuis2 != null + && !Collections.disjoint(tuis1, tuis2)) { + return 1; + } else { + return 0; + } + } + + public void setConceptSimilarityService( + ConceptSimilarityService conceptSimilarityService) { + this.conceptSimilarityService = conceptSimilarityService; + } + + public void setCorpusName(String corpusName) { + this.corpusName = corpusName; + } + + public void setCuiTuiQuery(String cuiTuiQuery) { + this.cuiTuiQuery = cuiTuiQuery; + } + + // public void setDataSource(DataSource dataSource) { + // this.dataSource = dataSource; + // // this.simpleJdbcTemplate = new SimpleJdbcTemplate(dataSource); + // this.jdbcTemplate = new JdbcTemplate(dataSource); + // } + + public void setMode(String mode) { + this.mode = mode; + } + + // public void setTransactionManager( + // PlatformTransactionManager transactionManager) { + // this.transactionManager = transactionManager; + // } + + /** + * + * @param o1 + * cui + * @param o2 + * cui + * @return concepts have overlapping tuis, return 1, else return 0 + */ + private double tuiCheck(Object o1, Object o2) { + if(cuiTuiMap == null) + return 0; + BitSet tuis1 = this.cuiTuiMap.get((String) o1); + BitSet tuis2 = this.cuiTuiMap.get((String) o2); + if (tuis1 != null && tuis2 != null && tuis1.intersects(tuis2)) { + return 1; + } else { + return 0; + } + } + + public Set tuiToMainSui(BitSet tuis) { + Set mainSui = new HashSet(tuis.size()); + for (int i = tuis.nextSetBit(0); i >= 0; i = tuis.nextSetBit(i + 1)) { + String tui = this.tuiList.get(i); + mainSui.add(getMainSem(Integer.parseInt(tui.substring(1)))); + } + return mainSui; + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SumKernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SumKernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SumKernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SumKernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,30 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import java.util.List; + +/** + * apply all the delegate kernels to the objects, sum them up + */ +public class SumKernel extends CacheKernel { + List delegateKernels; + + public List getDelegateKernels() { + return delegateKernels; + } + + public void setDelegateKernels(List delegateKernels) { + this.delegateKernels = delegateKernels; + } + + /** + * + */ + @Override + public double innerEvaluate(Object o1, Object o2) { + double d = 0; + for(Kernel k : delegateKernels) { + d += k.evaluate(o1, o2); + } + return d; + } +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SymmetricPairCacheKeyGenerator.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SymmetricPairCacheKeyGenerator.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SymmetricPairCacheKeyGenerator.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/SymmetricPairCacheKeyGenerator.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,28 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import java.lang.reflect.Method; + +import org.apache.ctakes.ytex.kernel.OrderedPair; + + +/** + * cache key for a method that takes 2 arguments, and is symmetric - the order + * of the arguments doesn't matter. + * + * @author vijay + * + */ +public class SymmetricPairCacheKeyGenerator implements CacheKeyGenerator { + + @SuppressWarnings({ "unchecked", "rawtypes" }) + @Override + public Object getCacheKey(Method method, Object[] args) { + return new OrderedPair((Comparable) args[0], (Comparable) args[1]); + } + + @SuppressWarnings({ "rawtypes", "unchecked" }) + public Object getCacheKey(Object o1, Object o2) { + return new OrderedPair((Comparable) o1, (Comparable) o2); + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/TreePrinter.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/TreePrinter.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/TreePrinter.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/TreePrinter.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,37 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import java.io.IOException; +import java.util.Map; + +import org.apache.ctakes.ytex.kernel.tree.InstanceTreeBuilder; +import org.apache.ctakes.ytex.kernel.tree.Node; +import org.springframework.context.ApplicationContext; +import org.springframework.context.access.ContextSingletonBeanFactoryLocator; + + +public class TreePrinter { + + public static void main(String args[]) throws IOException, ClassNotFoundException { + String beanRefContext = "classpath*:org/apache/ctakes/ytex/kernelBeanRefContext.xml"; + String contextName = "kernelApplicationContext"; + ApplicationContext appCtx = (ApplicationContext) ContextSingletonBeanFactoryLocator + .getInstance(beanRefContext) + .useBeanFactory(contextName).getFactory(); + ApplicationContext appCtxSource = appCtx; + InstanceTreeBuilder builder = appCtxSource.getBean( + "instanceTreeBuilder", InstanceTreeBuilder.class); + Map instanceMap = builder.loadInstanceTrees(args[0]); + for(Node node : instanceMap.values()) + printTree(node, 0); + } + + private static void printTree(Node node, int depth) { + for(int i = 0; i<= depth; i++) { + System.out.print(" "); + } + System.out.println(node); + for(Node child : node.getChildren()) { + printTree(child, depth+1); + } + } +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/WeightedPolynomialMixingKernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/WeightedPolynomialMixingKernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/WeightedPolynomialMixingKernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/WeightedPolynomialMixingKernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,135 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import java.util.Map; + +import org.apache.ctakes.ytex.kernel.tree.Node; +import org.springframework.beans.factory.InitializingBean; + + +/** + * weighted polynomial mixing kernel: + * (\sum w_i * k(x_i, y_i) \div \sum w_i ) ^ l + * + *
      + *
    • Apply the delegate kernel to the respective 'parts' of this node (x_i, + * y_i) + *
    • Multiply the result by the weight (w_i * k(x_i,y_i)) + *
    • Sum everything up, divide by the sum of the weights + *
    • take the power of everything to l + *
    + * + * + * {@link #pow} the power to raise things to + *

    + * {@link #attributeKey} the children of this node form a 'vector'. This is the + * attribute that we use to 'index' this vector + *

    + * {@link #mapIndexWeight} the indices (i.e. the value of + * child.getValue().get(attributeKey) and the corresponding + * weights. The indices must be integers. The + * class must be identical to the class of the child - take care that the + * numeric types match. + *

    + * {@link #delegateKernel} the kernel to apply to pairs of children. + * + * @author vijay + * + */ +public class WeightedPolynomialMixingKernel implements Kernel, InitializingBean { + + private int pow = 1; + private String attributeKey; + private Map mapIndexWeight; + private Kernel delegateKernel; + + public int getPow() { + return pow; + } + + public void setPow(int pow) { + this.pow = pow; + } + + public String getAttributeKey() { + return attributeKey; + } + + public void setAttributeKey(String attributeKey) { + this.attributeKey = attributeKey; + } + + public Map getMapIndexWeight() { + return mapIndexWeight; + } + + public void setMapIndexWeight(Map mapIndexWeight) { + this.mapIndexWeight = mapIndexWeight; + } + + public Kernel getDelegateKernel() { + return delegateKernel; + } + + public void setDelegateKernel(Kernel delegateKernel) { + this.delegateKernel = delegateKernel; + } + + private double scalingFactor; + + @Override + public double evaluate(Object o1, Object o2) { + double retVal = 0; + // both objects must be nodes + if ((o1 instanceof Node) && (o2 instanceof Node)) { + double keval = 0; + // iterate through the 'indices' and the weights + for (Map.Entry indexWeight : mapIndexWeight + .entrySet()) { + // get the pair of matching nodes + Node n1 = getNodeForIndex(indexWeight.getKey(), (Node) o1); + Node n2 = getNodeForIndex(indexWeight.getKey(), (Node) o2); + if (n1 != null && n2 != null) { + // evaluate the kernel, multiply by weight, add to running + // sum + keval += (delegateKernel.evaluate(n1, n2) * indexWeight + .getValue()); + } + } + if (keval != 0) { + // raise to the power, divide by the scaling factor + retVal = Math.pow(keval, pow) / scalingFactor; + } + } + return retVal; + + } + + /** + * @param index + * the attribute has to match this + * @param o1 + * the node whose children we're going to search + * @return node if found, else null + */ + private Node getNodeForIndex(int index, Node o1) { + for (Node n : o1.getChildren()) { + Integer attribute = (Integer)n.getValue().get(attributeKey); + if (attribute != null && index == attribute.intValue()) + return n; + } + return null; + } + + /** + * precompute the scaling factor - we will always divide by this + */ + @Override + public void afterPropertiesSet() throws Exception { + double totalWeight = 0d; + for (double weight : this.mapIndexWeight.values()) { + totalWeight += weight; + } + this.scalingFactor = Math.pow(totalWeight, pow); + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/BaseSimilarityMetric.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/BaseSimilarityMetric.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/BaseSimilarityMetric.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/BaseSimilarityMetric.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,92 @@ +package org.apache.ctakes.ytex.kernel.metric; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + + +public abstract class BaseSimilarityMetric implements SimilarityMetric { + + protected ConceptSimilarityService simSvc; + + public ConceptSimilarityService getConceptSimilarityService() { + return simSvc; + } + + public void setConceptSimilarityService( + ConceptSimilarityService conceptSimilarityService) { + this.simSvc = conceptSimilarityService; + } + + /** + * compute the lcses and min path distance for the concept pair, if this + * hasn't been done already + * + * @param concept1 + * @param concept2 + * @param simInfo + */ + protected void initLCSes(String concept1, String concept2, + SimilarityInfo simInfo) { + if (simInfo.getLcsDist() == null) { + simInfo.setLcsDist(simSvc.getLCS(concept1, concept2, + simInfo.getLcses(), simInfo.getLcsPaths())); + } + } + + /** + * get the best lcs and its information content if this hasn't been done + * already. + * + * @param conceptFilter + * @param simInfo + * @param intrinsicIC + * set to false for corpus based ic + * @return + */ + protected double initLcsIC(Map conceptFilter, + SimilarityInfo simInfo, boolean intrinsicIC) { + Double lcsIC = intrinsicIC ? simInfo.getIntrinsicLcsIC() : simInfo + .getCorpusLcsIC(); + if (lcsIC == null) { + String lcs = null; + lcsIC = 0d; + Object[] bestLCSArr = simSvc.getBestLCS(simInfo.getLcses(), + intrinsicIC, conceptFilter); + if (bestLCSArr != null) { + lcs = (String) bestLCSArr[0]; + lcsIC = (Double) bestLCSArr[1]; + if (intrinsicIC) { + simInfo.setIntrinsicLcs(lcs); + simInfo.setIntrinsicLcsIC(lcsIC); + } else { + simInfo.setCorpusLcs(lcs); + simInfo.setCorpusLcsIC(lcsIC); + } + } + } + return lcsIC; + } + + /** + * call initLCSes and initLcsIC + * + * @param concept1 + * @param concept2 + * @param conceptFilter + * @param simInfo + * @param intrinsicIC + * @return + */ + protected double initLcsIC(String concept1, String concept2, + Map conceptFilter, SimilarityInfo simInfo, + boolean intrinsicIC) { + this.initLCSes(concept1, concept2, simInfo); + return this.initLcsIC(conceptFilter, simInfo, intrinsicIC); + } + + public BaseSimilarityMetric(ConceptSimilarityService simSvc) { + this.simSvc = simSvc; + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptInfo.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptInfo.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptInfo.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptInfo.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,71 @@ +package org.apache.ctakes.ytex.kernel.metric; + +/** + * we run into out of memory errors when preloading the intrinsic ic for large + * concept graphs. 'compress' the depth a tiny bit by using short instead of + * int. + *

    + * Tried using float instead of double, but didn't get into the under 1gb range + * for very large concept graphs, so just use double to avoid precision errors. + * + * @author vijay + * + */ +public class ConceptInfo { + private String conceptId; + private short depth; + // private float corpusIC; + // private float intrinsicIC; + private double corpusIC; + private double intrinsicIC; + + public ConceptInfo() { + super(); + } + + public ConceptInfo(String conceptId, int depth, double corpusIC, + double intrinsicIC) { + super(); + this.conceptId = conceptId; + this.depth = (short) depth; + // this.corpusIC = (float) corpusIC; + // this.intrinsicIC = (float) intrinsicIC; + this.corpusIC = corpusIC; + this.intrinsicIC = intrinsicIC; + } + + public String getConceptId() { + return conceptId; + } + + public void setConceptId(String conceptId) { + this.conceptId = conceptId; + } + + public int getDepth() { + return (int) depth; + } + + public void setDepth(int depth) { + this.depth = (short) depth; + } + + public double getCorpusIC() { + return (double) corpusIC; + } + + public void setCorpusIC(double corpusIC) { + // this.corpusIC = (float) corpusIC; + this.corpusIC = (double) corpusIC; + } + + public double getIntrinsicIC() { + return (double) intrinsicIC; + } + + public void setIntrinsicIC(double intrinsicIC) { + // this.intrinsicIC = (float) intrinsicIC; + this.intrinsicIC = (double) intrinsicIC; + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPair.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPair.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPair.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPair.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,99 @@ +package org.apache.ctakes.ytex.kernel.metric; + +import java.io.Serializable; + +import javax.xml.bind.annotation.XmlAttribute; + +/** + * pair of concepts. used to submit a set of concepts to the similarity service + * to compute pairwise similarity. + * + * @author vijay + * + */ +public class ConceptPair implements Serializable, Comparable { + /** + * + */ + private static final long serialVersionUID = 1L; + private String concept1; + private String concept2; + + @XmlAttribute public String getConcept1() { + return concept1; + } + + public void setConcept1(String concept1) { + this.concept1 = concept1; + } + + @XmlAttribute public String getConcept2() { + return concept2; + } + + public void setConcept2(String concept2) { + this.concept2 = concept2; + } + + public ConceptPair(String concept1, String concept2) { + super(); + this.concept1 = concept1; + this.concept2 = concept2; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + + ((concept1 == null) ? 0 : concept1.hashCode()); + result = prime * result + + ((concept2 == null) ? 0 : concept2.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + ConceptPair other = (ConceptPair) obj; + if (concept1 == null) { + if (other.concept1 != null) + return false; + } else if (!concept1.equals(other.concept1)) + return false; + if (concept2 == null) { + if (other.concept2 != null) + return false; + } else if (!concept2.equals(other.concept2)) + return false; + return true; + } + + @Override + public String toString() { + return "ConceptPair [concept1=" + concept1 + ", concept2=" + concept2 + + "]"; + } + + public ConceptPair() { + super(); + } + + + /** + * compare concept 1, then concept 2 + */ + @Override + public int compareTo(ConceptPair other) { + int c1 = getConcept1().compareTo(other.getConcept1()); + if(c1 != 0) + return c1; + return getConcept2().compareTo(other.getConcept2()); + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPairSimilarity.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPairSimilarity.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPairSimilarity.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptPairSimilarity.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,51 @@ +package org.apache.ctakes.ytex.kernel.metric; + +import java.io.Serializable; +import java.util.List; + +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; + +@XmlRootElement(name = "conceptPairSimilarity") +public class ConceptPairSimilarity implements Serializable { + /** + * + */ + private static final long serialVersionUID = 1L; + private ConceptPair conceptPair; + + private List similarities; + private SimilarityInfo similarityInfo; + + public ConceptPairSimilarity() { + super(); + } + + @XmlElement + public ConceptPair getConceptPair() { + return conceptPair; + } + + @XmlAttribute + public List getSimilarities() { + return similarities; + } + + @XmlElement + public SimilarityInfo getSimilarityInfo() { + return similarityInfo; + } + + public void setConceptPair(ConceptPair conceptPair) { + this.conceptPair = conceptPair; + } + + public void setSimilarities(List similarities) { + this.similarities = similarities; + } + + public void setSimilarityInfo(SimilarityInfo similarityInfo) { + this.similarityInfo = similarityInfo; + } +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityService.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityService.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityService.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityService.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,186 @@ +package org.apache.ctakes.ytex.kernel.metric; + +import java.util.BitSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.ctakes.ytex.kernel.model.ConceptGraph; + + +public interface ConceptSimilarityService { + + public enum SimilarityMetricEnum { + LCH(false, false), INTRINSIC_LCH(true, false), LIN(false, true), INTRINSIC_LIN( + true, false), PATH(false, false), INTRINSIC_PATH(true, false), JACCARD( + true, false), SOKAL(true, false), RADA(false, false), INTRINSIC_RADA( + true, false), WUPALMER(false, false), PAGERANK(false, false); + boolean intrinsicIC = false; + boolean corpusIC = false; + + /** + * is this measure taxonomy based? + * + * @return + */ + public boolean isTaxonomy() { + return !intrinsicIC && !corpusIC; + } + + /** + * is this measure based on intrinsic IC? + * + * @return + */ + public boolean isIntrinsicIC() { + return intrinsicIC; + } + + /** + * is this measure based on corpus IC? + * + * @return + */ + public boolean isCorpusIC() { + return corpusIC; + } + + SimilarityMetricEnum(boolean intrinsicIC, boolean corpusIC) { + this.intrinsicIC = intrinsicIC; + this.corpusIC = corpusIC; + } + } + + public String getConceptGraphName(); + + // public abstract double lch(String concept1, String concept2); + + // public abstract double lin(String concept1, String concept2); + + public int lcs(String concept1, String concept2, List lcsPath); + + public abstract ConceptGraph getConceptGraph(); + + /** + * cui - tui map. tuis are bitsets, indices correspond to tuis in + * {@link #getTuiList()} + * + * @return + */ + public abstract Map getCuiTuiMap(); + + // /** + // * supervised lin measure. + // * + // * @param concept1 + // * @param concept2 + // * @param conceptFilter + // * map of concept id to imputed infogain. if the concept isn't in + // * this map, the concepts won't be compared. null for + // * unsupervised lin. + // * @return + // */ + // public abstract double filteredLin(String concept1, String concept2, + // Map conceptFilter); + + /** + * list of tuis that corresponds to bitset indices + * + * @return + */ + public abstract List getTuiList(); + + /** + * For the given label and cutoff, get the corresponding concepts whose + * propagated ig meets the threshold. Used by lin kernel to find concepts + * that actually have a non-trivial similarity + * + * @param label + * label + * @param rankCutoff + * cutoff + * @param conceptFilter + * set to fill with concepts + * @return double minimum evaluation + */ + public abstract double loadConceptFilter(String label, int rankCutoff, + Map conceptFilter); + + /** + * get the lcs(s) for the specified concepts + * + * @param concept1 + * required + * @param concept2 + * required + * @param lcses + * required - will be filled with the lcs(s). + * @param lcsPathMap + * optional - will be filled with lcs and paths through the + * lcses. + * @return distance of path through lcs + */ + public int getLCS(String concept1, String concept2, Set lcses, + List lcsPaths); + + /** + * get the best lcs + * + * @param lcses + * set of lcses + * @param intrinsicIC + * should the intrinsic ic be used? false - use corpus-based ic. + * For multiple lcses not using concept filter, use the lcs with + * the lowest infocontent + * @param conceptFilter + * limit to lcses in the concept filter. The lcs with the highest + * value will be used. + * @return array with 2 entries. Entry 1 - lcs (String). Entry 2 - + * infocontent (double). Null if no lcses are in the concept filter. + */ + public Object[] getBestLCS(Set lcses, boolean intrinsicIC, + Map conceptFilter); + + public abstract double getIC(String concept, boolean intrinsicICMap); + + /** + * compute similarity for a pair of concepts + * + * @param metrics + * required, similarity metrics to compute + * @param concept1 + * required + * @param concept2 + * required + * @param conceptFilter + * optional - only lcs's in this set will be used. + * @param simInfo + * optional - pass this to get information on lcs. Instantiate + * the lcsPathMap to get paths through lcs + * @return similarities + */ + public abstract ConceptPairSimilarity similarity( + List metrics, String concept1, + String concept2, Map conceptFilter, boolean lcs); + + /** + * compute similarity for a list of concept pairs + * + * @param conceptPairs + * required, concept pairs for which similarity should be + * computed + * @param metrics + * required, similarity metrics to compute + * @param conceptFilter + * optional - only lcs's in this set will be used. + * @param simInfos + * optional - if provided, this list will be filled with the + * similarity info for each concept pair. + * @return similarities + */ + public List similarity( + List conceptPairs, List metrics, + Map conceptFilter, boolean lcs); + + public abstract int getDepth(String concept); +} \ No newline at end of file Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,1079 @@ +package org.apache.ctakes.ytex.kernel.metric; + +import java.io.BufferedOutputStream; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.IOException; +import java.io.PrintStream; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; + +import net.sf.ehcache.Cache; +import net.sf.ehcache.CacheManager; +import net.sf.ehcache.Element; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.ctakes.ytex.kernel.ImputedFeatureEvaluator; +import org.apache.ctakes.ytex.kernel.InfoContentEvaluator; +import org.apache.ctakes.ytex.kernel.IntrinsicInfoContentEvaluator; +import org.apache.ctakes.ytex.kernel.OrderedPair; +import org.apache.ctakes.ytex.kernel.SimSvcContextHolder; +import org.apache.ctakes.ytex.kernel.dao.ClassifierEvaluationDao; +import org.apache.ctakes.ytex.kernel.dao.ConceptDao; +import org.apache.ctakes.ytex.kernel.model.ConcRel; +import org.apache.ctakes.ytex.kernel.model.ConceptGraph; +import org.apache.ctakes.ytex.kernel.model.FeatureRank; +import org.apache.ctakes.ytex.kernel.pagerank.PageRankService; +import org.springframework.transaction.PlatformTransactionManager; +import org.springframework.transaction.TransactionStatus; +import org.springframework.transaction.support.TransactionCallback; +import org.springframework.transaction.support.TransactionTemplate; + +import com.google.common.collect.ImmutableMap; + + +/** + * compute concept similarity + * + * @author vijay + * + */ +public class ConceptSimilarityServiceImpl implements ConceptSimilarityService { + private static final Log log = LogFactory + .getLog(ConceptSimilarityServiceImpl.class); + + private static String formatPaths(List lcsPaths) { + StringBuilder b = new StringBuilder(); + Iterator lcsPathIter = lcsPaths.iterator(); + while (lcsPathIter.hasNext()) { + LCSPath lcsPath = lcsPathIter.next(); + String lcs = lcsPath.getLcs(); + b.append(lcs); + b.append("="); + b.append(lcsPath.toString()); + if (lcsPathIter.hasNext()) + b.append("|"); + } + return b.toString(); + } + + @SuppressWarnings("static-access") + public static void main(String args[]) throws IOException { + Options options = new Options(); + options.addOption(OptionBuilder + .withArgName("concepts") + .hasArg() + .withDescription( + "concept pairs or a file containing concept pairs. To specify pairs on command line, separate concepts by comma, concept pairs by semicolon. For file, separate concepts by comma or tab, each concept pair on a new line.") + .isRequired(true).create("concepts")); + options.addOption(OptionBuilder + .withArgName("metrics") + .hasArg() + .withDescription( + "comma-separated list of metrics. Valid metrics: " + + Arrays.asList(SimilarityMetricEnum.values())) + .isRequired(true).create("metrics")); + options.addOption(OptionBuilder + .withArgName("out") + .hasArg() + .withDescription( + "file to write oputput to. if not specified, output sent to stdout.") + .create("out")); + options.addOption(OptionBuilder.withArgName("lcs") + .withDescription("output lcs and path for each concept pair") + .create("lcs")); + try { + CommandLineParser parser = new GnuParser(); + CommandLine line = parser.parse(options, args); + String concepts = line.getOptionValue("concepts"); + String metrics = line.getOptionValue("metrics"); + String out = line.getOptionValue("out"); + boolean lcs = line.hasOption("lcs"); + PrintStream os = null; + try { + if (out != null) { + os = new PrintStream(new BufferedOutputStream( + new FileOutputStream(out))); + } else { + os = System.out; + } + List conceptPairs = parseConcepts(concepts); + List metricList = parseMetrics(metrics); + ConceptSimilarityService simSvc = SimSvcContextHolder + .getApplicationContext().getBean( + ConceptSimilarityService.class); + List simInfos = lcs ? new ArrayList( + conceptPairs.size()) : null; + List conceptSimMap = simSvc.similarity( + conceptPairs, metricList, null, lcs); + printSimilarities(conceptPairs, conceptSimMap, metricList, + simInfos, lcs, os); + // try { + // Thread.sleep(60*1000); + // } catch (InterruptedException e) { + // e.printStackTrace(); + // } + } finally { + if (out != null) { + try { + os.close(); + } catch (Exception e) { + } + } + } + } catch (ParseException pe) { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp( + "java " + ConceptSimilarityServiceImpl.class.getName() + + " get concept similiarity", options); + } + } + + private static List parseConcepts(String concepts) + throws IOException { + BufferedReader r = null; + try { + List conceptPairs = new ArrayList(); + File f = new File(concepts); + if (f.exists()) { + r = new BufferedReader(new FileReader(f)); + } else { + r = new BufferedReader(new StringReader(concepts)); + } + String line = null; + while ((line = r.readLine()) != null) { + // for command line, split pairs by semicolon + String lines[] = line.split(";"); + for (String subline : lines) { + String pair[] = subline.split(",|\\t"); + if (pair.length != 2) { + System.err.println("cannot parse concept pair: " + + subline); + } else { + conceptPairs.add(new ConceptPair(pair[0], pair[1])); + } + } + } + return conceptPairs; + } finally { + if (r != null) + r.close(); + } + } + + private static List parseMetrics(String metrics) { + String ms[] = metrics.split(","); + List metricSet = new ArrayList(); + for (String metric : ms) { + SimilarityMetricEnum m = SimilarityMetricEnum.valueOf(metric); + if (m == null) + System.err.println("invalid metric: " + ms); + else + metricSet.add(m); + } + return metricSet; + } + + private static void printSimilarities(List conceptPairs, + List conceptSimList, + List metricList, + List simInfos, boolean lcs, PrintStream os) { + // print header + os.print("Concept 1\tConcept 2"); + for (SimilarityMetricEnum metric : metricList) { + os.print("\t"); + os.print(metric); + } + if (lcs) { + os.print("\tlcs(s)\tcorpus lcs\tintrinsic lcs\tpaths"); + } + os.println(); + // print content + for (ConceptPairSimilarity csim : conceptSimList) { + ConceptPair p = csim.getConceptPair(); + os.print(p.getConcept1()); + os.print("\t"); + os.print(p.getConcept2()); + for (Double sim : csim.getSimilarities()) { + os.print("\t"); + if (sim != null) + os.print(String.format("%6f", sim)); + else + os.print(0d); + } + if (lcs) { + SimilarityInfo simInfo = csim.getSimilarityInfo(); + os.print("\t"); + Iterator lcsIter = simInfo.getLcses().iterator(); + while (lcsIter.hasNext()) { + os.print(lcsIter.next()); + if (lcsIter.hasNext()) + os.print('|'); + } + os.print("\t"); + os.print(simInfo.getCorpusLcs() == null ? "" : simInfo + .getCorpusLcs()); + os.print("\t"); + os.print(simInfo.getIntrinsicLcs() == null ? "" : simInfo + .getIntrinsicLcs()); + os.print("\t"); + os.print(formatPaths(simInfo.getLcsPaths())); + } + os.println(); + } + } + + private CacheManager cacheManager; + + private ConceptGraph cg = null; + + private ClassifierEvaluationDao classifierEvaluationDao; + + private ConceptDao conceptDao; + private String conceptGraphName; + + private String conceptSetName; + + // /** + // * information concept cache + // */ + // private Map corpusICMap = null; + + private String corpusName; + + private Map cuiTuiMap; + + // private Map conceptInfoMap = null; + // private ConceptInfo[] conceptInfoCache; + + /** + * cache to hold lcs's + */ + private Cache lcsCache; + private String lcsImputedType = ImputedFeatureEvaluator.MeasureType.INFOGAIN + .getName(); + private PageRankService pageRankService; + + private boolean preload = true; + private Map corpusICMap; + + private Map similarityMetricMap = null; + + private PlatformTransactionManager transactionManager; + + private List tuiList; + + private void addCuiTuiToMap(Map> cuiTuiMap, + Map tuiMap, String cui, String tui) { + // get 'the' tui string + if (tuiMap.containsKey(tui)) + tui = tuiMap.get(tui); + else + tuiMap.put(tui, tui); + Set tuis = cuiTuiMap.get(cui); + if (tuis == null) { + tuis = new HashSet(); + cuiTuiMap.put(cui, tuis); + } + tuis.add(tui); + } + + // /** + // * return lin measure. optionally filter lin measure so that only concepts + // * that have an lcs that is relevant to the classification task have a + // * non-zero lin measure. + // * + // * relevant concepts are those whose evaluation wrt the label exceeds a + // * threshold. + // * + // * @param concept1 + // * @param concept2 + // * @param label + // * if not null, then filter lcses. + // * @param lcsMinEvaluation + // * if gt; 0, then filter lcses. this is the threshold. + // * @return 0 - no lcs, or no lcs that meets the threshold. + // */ + // @Override + // public double filteredLin(String concept1, String concept2, + // Map conceptFilter) { + // double ic1 = getIC(concept1); + // double ic2 = getIC(concept2); + // // lin not defined if one of the concepts doesn't exist in the corpus + // if (ic1 == 0 || ic2 == 0) + // return 0; + // double denom = getIC(concept1) + getIC(concept2); + // if (denom != 0) { + // ConcRel cr1 = cg.getConceptMap().get(concept1); + // ConcRel cr2 = cg.getConceptMap().get(concept2); + // if (cr1 != null && cr2 != null) { + // Set lcses = new HashSet(); + // int dist = getLCSFromCache(cr1, cr2, lcses); + // if (dist > 0) { + // double ic = getBestIC(lcses, conceptFilter); + // return 2 * ic / denom; + // } + // } + // } + // return 0; + // } + + // /** + // * get the information content for the concept with the highest evaluation + // * greater than a specified threshold. + // * + // * If threshold 0, get the lowest IC of all the lcs's. + // * + // * @param lcses + // * the least common subsumers of a pair of concepts + // * @param label + // * label against which feature was evaluated + // * @param lcsMinEvaluation + // * threshold that the feature has to exceed. 0 for no filtering. + // * @return 0 if no lcs that makes the cut. else find the lcs(es) with the + // * maximal evaluation, and return getIC on these lcses. + // * + // * @see #getIC(Iterable) + // */ + // private double getBestIC(Set lcses, + // Map conceptFilter) { + // if (conceptFilter != null) { + // double currentBest = -1; + // Set bestLcses = new HashSet(); + // for (String lcs : lcses) { + // if (conceptFilter.containsKey(lcs)) { + // double lcsEval = conceptFilter.get(lcs); + // if (currentBest == -1 || lcsEval > currentBest) { + // bestLcses.clear(); + // bestLcses.add(lcs); + // currentBest = lcsEval; + // } else if (currentBest == lcsEval) { + // bestLcses.add(lcs); + // } + // } + // } + // if (bestLcses.size() > 0) { + // return this.getIC(bestLcses); + // } + // } else { + // // unfiltered - get the lowest ic + // return this.getIC(lcses); + // } + // return 0; + // } + + // private ConceptInfo getPreloadedConceptInfo(String conceptId) { + // ConcRel cr = cg.getConceptMap().get(conceptId); + // if (cr != null) { + // return this.conceptInfoCache[cr.getNodeIndex()]; + // } + // return null; + // } + + @Override + public Object[] getBestLCS(Set lcses, boolean intrinsicIC, + Map conceptFilter) { + Map lcsICMap = new HashMap(lcses.size()); + // if (isPreload()) { + // look in conceptInfoMap for info content + for (String lcs : lcses) { + lcsICMap.put(lcs, getIC(lcs, intrinsicIC)); + // } + // } else { + // // load info content on demand + // Map frMap = getICOnDemand(lcses, + // intrinsicIC); + // for (Map.Entry frMapEntry : + // frMap.entrySet()) { + // lcsICMap.put(frMapEntry.getKey(), frMapEntry.getValue() + // .getEvaluation()); + // } + } + if (conceptFilter != null) { + double currentBest = -1; + Set bestLcses = new HashSet(); + for (String lcs : lcses) { + if (conceptFilter.containsKey(lcs)) { + double lcsEval = conceptFilter.get(lcs); + if (currentBest == -1 || lcsEval > currentBest) { + bestLcses.clear(); + bestLcses.add(lcs); + currentBest = lcsEval; + } else if (currentBest == lcsEval) { + bestLcses.add(lcs); + } + } + } + if (currentBest < 0) + currentBest = 0d; + if (bestLcses.size() > 0) { + return this.getBestLCS(bestLcses, lcsICMap); + } else { + // no lcses made the cut + return null; + } + } else { + // unfiltered - get the lowest ic + return this.getBestLCS(lcses, lcsICMap); + } + } + + private Map getICOnDemand(Set lcses, + boolean intrinsicIC) { + if (lcses == null || lcses.isEmpty()) + return new HashMap(0); + Map lcsICMap; + lcsICMap = this.classifierEvaluationDao + .getFeatureRanks( + lcses, + intrinsicIC ? null : this.corpusName, + intrinsicIC ? null : this.conceptSetName, + null, + intrinsicIC ? IntrinsicInfoContentEvaluator.INTRINSIC_INFOCONTENT + : InfoContentEvaluator.INFOCONTENT, null, 0d, + this.getConceptGraphName()); + return lcsICMap; + } + + public Object[] getBestLCS(Set lcses, Map icMap) { + double ic = -1; + String bestLCS = null; + for (String lcs : lcses) { + Double ictmp = icMap.get(lcs); + if (ictmp != null && ic < ictmp.doubleValue()) { + ic = ictmp; + bestLCS = lcs; + } + } + if (ic < 0) + ic = 0d; + return new Object[] { bestLCS, ic }; + } + + // private String createKey(String c1, String c2) { + // if (c1.compareTo(c2) < 0) { + // return new StringBuilder(c1).append("-").append(c2).toString(); + // } else { + // return new StringBuilder(c2).append("-").append(c1).toString(); + // } + // } + + public CacheManager getCacheManager() { + return cacheManager; + } + + public ClassifierEvaluationDao getClassifierEvaluationDao() { + return classifierEvaluationDao; + } + + public ConceptDao getConceptDao() { + return conceptDao; + } + + @Override + public ConceptGraph getConceptGraph() { + return cg; + } + + public String getConceptGraphName() { + return conceptGraphName; + } + + public String getConceptSetName() { + return conceptSetName; + } + + public String getCorpusName() { + return corpusName; + } + + @Override + public Map getCuiTuiMap() { + return cuiTuiMap; + } + + // /** + // * get the concept with the lowest Information Content of all the LCSs. + // * Functionality copied from umls interface. + // * + // * @todo make this configurable/add a parameter - avg/min/max/median? + // * @param lcses + // * @return + // */ + // public double getIC(Iterable lcses) { + // double ic = 0; + // for (String lcs : lcses) { + // double ictmp = getIC(lcs); + // if (ic < ictmp) + // ic = ictmp; + // } + // return ic; + // } + // + // public double getIC(String concept1) { + // Double dRetVal = corpusICMap.get(concept1); + // if (dRetVal != null) + // return (double) dRetVal; + // else + // return 0; + // } + + @Override + public double getIC(String concept, boolean intrinsicICMap) { + double ic = 0d; + if (intrinsicICMap) { + ConcRel cr = this.cg.getConceptMap().get(concept); + if (cr != null) + ic = cr.getIntrinsicInfoContent(); + } else { + Double icC = null; + if (isPreload()) { + // we preloaded all ic - just look in the cache + icC = this.corpusICMap.get(concept); + } else { + // we need to load the ic from the database on demand + Map frMap = getICOnDemand( + new HashSet(Arrays.asList(concept)), false); + if (frMap.containsKey(concept)) + return frMap.get(concept).getEvaluation(); + } + if (icC != null) + ic = icC; + } + return ic; + // if (isPreload()) { + // ConceptInfo ci = this.getPreloadedConceptInfo(concept); + // if (ci != null) + // return intrinsicICMap ? ci.getIntrinsicIC() : ci.getCorpusIC(); + // } else { + // Map frMap = getICOnDemand(new HashSet( + // Arrays.asList(concept)), intrinsicICMap); + // if (frMap.containsKey(concept)) + // return frMap.get(concept).getEvaluation(); + // } + // return 0d; + } + + @Override + public int getDepth(String concept) { + // if (isPreload()) { + // // preloaded all concept info - depth should be there + // ConceptInfo ci = this.getPreloadedConceptInfo(concept); + // if (ci != null) + // return (int) ci.getDepth(); + // } else { + // // get the feature ranks for the intrinsic infocontent - + // // rank = depth + // Map frMap = getICOnDemand(new HashSet( + // Arrays.asList(concept)), true); + // if (frMap.containsKey(concept)) + // return frMap.get(concept).getRank(); + // } + ConcRel cr = this.cg.getConceptMap().get(concept); + if (cr != null) + return cr.getDepth(); + return 0; + } + + public int getLCS(String concept1, String concept2, Set lcses, + List lcsPaths) { + int lcsDist = 0; + ConcRel cr1 = getConceptGraph().getConceptMap().get(concept1); + ConcRel cr2 = getConceptGraph().getConceptMap().get(concept2); + if (cr1 != null && cr2 != null) { + lcses.clear(); + if (lcsPaths == null) { + // no need to get paths which we don't cache - look in the cache + lcsDist = getLCSFromCache(cr1, cr2, lcses); + } else { + lcsPaths.clear(); + // need to get paths - compute the lcses and their paths + lcsDist = lcs(concept1, concept2, lcsPaths); + for (LCSPath lcsPath : lcsPaths) { + lcses.add(lcsPath.getLcs()); + } + } + } else { + if (log.isDebugEnabled()) { + if (cr1 == null) + log.debug("could not find concept:" + concept1); + if (cr2 == null) + log.debug("could not find concept:" + concept2); + } + } + return lcsDist; + } + + @SuppressWarnings("unchecked") + private int getLCSFromCache(ConcRel cr1, ConcRel cr2, Set lcses) { + OrderedPair cacheKey = new OrderedPair( + cr1.getConceptID(), cr2.getConceptID()); + Element e = this.lcsCache.get(cacheKey); + if (e != null) { + // hit the cache - unpack the lcs + if (e.getObjectValue() != null) { + Object[] val = (Object[]) e.getObjectValue(); + lcses.addAll((Set) val[1]); + return (Integer) val[0]; + } else { + return -1; + } + } else { + // missed the cache - save the lcs + Object[] val = null; + Set lcsCRSet = new HashSet(2); + int dist = ConcRel.getLeastCommonConcept(cr1, cr2, lcsCRSet, null); + if (dist >= 0) { + val = new Object[2]; + val[0] = dist; + for (ConcRel cr : lcsCRSet) { + lcses.add(cr.getConceptID()); + } + val[1] = lcses; + } + e = new Element(cacheKey, val); + this.lcsCache.put(e); + return dist; + } + } + + public String getLcsImputedType() { + return lcsImputedType; + } + + public PageRankService getPageRankService() { + return pageRankService; + } + + public Map getSimilarityMetricMap() { + return similarityMetricMap; + } + + public PlatformTransactionManager getTransactionManager() { + return transactionManager; + } + + @Override + public List getTuiList() { + return this.tuiList; + } + + public void init() { + log.info("begin initialization for concept graph: " + conceptGraphName); + TransactionTemplate t = new TransactionTemplate(this.transactionManager); + t.setPropagationBehavior(TransactionTemplate.PROPAGATION_REQUIRES_NEW); + t.execute(new TransactionCallback() { + @Override + public Object doInTransaction(TransactionStatus arg0) { + cg = conceptDao.getConceptGraph(conceptGraphName); + if (cg == null) { + log.warn("concept graph null, name: " + conceptGraphName); + return null; + } + if (isPreload()) { + initInfoContent(); + initCuiTuiMapFromCorpus(); + } + initSimilarityMetricMap(); + return null; + } + }); + this.lcsCache = getCacheManager().getCache("lcsCache"); + log.info("end initialization for concept graph: " + conceptGraphName); + } + + /** + * load cui-tui for the specified corpus from the MRSTY table + */ + public void initCuiTuiMapFromCorpus() { + // don't duplicate tui strings to save memory + SortedMap tuiMap = new TreeMap(); + Map> tmpTuiCuiMap = new HashMap>(); + List listCuiTui = this.classifierEvaluationDao + .getCorpusCuiTuis(this.getCorpusName(), + this.getConceptGraphName(), this.getConceptSetName()); + for (Object[] cuiTui : listCuiTui) { + String cui = (String) cuiTui[0]; + String tui = (String) cuiTui[1]; + addCuiTuiToMap(tmpTuiCuiMap, tuiMap, cui, tui); + } + // map of tui - bitset index + SortedMap mapTuiIndex = new TreeMap(); + // list of tuis corresponding to bitset indices + List tmpTuiList = new ArrayList(tuiMap.size()); + int index = 0; + for (String tui : tuiMap.keySet()) { + mapTuiIndex.put(tui, index++); + tmpTuiList.add(tui); + } + this.tuiList = Collections.unmodifiableList(tmpTuiList); + // convert list of cuis into bitsets + // Map tmpCuiTuiBitsetMap = new HashMap(); + ImmutableMap.Builder cuiTuiBitsetMapBuilder = new ImmutableMap.Builder(); + for (Map.Entry> cuiTuiMapEntry : tmpTuiCuiMap + .entrySet()) { + // tmpCuiTuiBitsetMap.put(cuiTuiMapEntry.getKey(), + // tuiListToBitset(cuiTuiMapEntry.getValue(), mapTuiIndex)); + cuiTuiBitsetMapBuilder.put(cuiTuiMapEntry.getKey(), + tuiListToBitset(cuiTuiMapEntry.getValue(), mapTuiIndex)); + } + // this.cuiTuiMap = Collections.unmodifiableMap(tmpCuiTuiBitsetMap); + this.cuiTuiMap = cuiTuiBitsetMapBuilder.build(); + } + + /** + * initialize information content caches TODO replace strings with concept + * ids from conceptGraph to save memory + */ + private void initInfoContent() { + // log.info("loading intrinsic infocontent for concept graph: " + // + conceptGraphName); + // List listConceptInfo = classifierEvaluationDao + // .getIntrinsicInfoContent(conceptGraphName); + // if (listConceptInfo.isEmpty()) { + // log.warn("intrinsic info content not available! most similarity measures will not work"); + // } + // this.conceptInfoCache = new ConceptInfo[cg.getConceptMap().size()]; + // for (ConceptInfo ci : listConceptInfo) { + // ConcRel cr = cg.getConceptMap().get(ci.getConceptId()); + // if (cr != null) { + // // save a little memory by reusing the string + // ci.setConceptId(cr.getConceptID()); + // conceptInfoCache[cr.getNodeIndex()] = ci; + // } + // } + // fill intrinsicIC + // Map intrinsicICMap = classifierEvaluationDao + // .getIntrinsicInfoContent(conceptGraphName); + // for (Map.Entry icMapEntry : intrinsicICMap + // .entrySet()) { + // FeatureRank r = icMapEntry.getValue(); + // ConcRel cr = cg.getConceptMap().get(r.getFeatureName()); + // if (cr != null) { + // ConceptInfo ci = new ConceptInfo(); + // ci.setConceptId(cr.getConceptID()); + // ci.setDepth(r.getRank()); + // ci.setIntrinsicIC(r.getEvaluation()); + // conceptInfoMap.put(ci.getConceptId(), ci); + // } + // } + // fill corpusIC + log.info("loading corpus infocontent for corpusName=" + corpusName + + ", conceptGraphName=" + conceptGraphName + + ", conceptSetName=" + conceptSetName); + Map corpusICMap = classifierEvaluationDao + .getInfoContent(corpusName, conceptGraphName, + this.conceptSetName); + if (corpusICMap == null || corpusICMap.isEmpty()) { + log.warn("IC not found"); + } + ImmutableMap.Builder mb = new ImmutableMap.Builder(); + for (Map.Entry corpusICEntry : corpusICMap.entrySet()) { + ConcRel cr = cg.getConceptMap().get(corpusICEntry.getKey()); + if (cr != null) { + mb.put(cr.getConceptID(), corpusICEntry.getValue()); + } + } + this.corpusICMap = mb.build(); + // ConceptInfo ci = this.conceptInfoCache[cr.getNodeIndex()]; + // if (ci == null) { + // // this shouldn't happen! there should be intrinsic ic for + // // this concept + // ci = new ConceptInfo(); + // ci.setConceptId(cr.getConceptID()); + // this.conceptInfoCache[cr.getNodeIndex()] = ci; + // } + // ci.setCorpusIC(corpusICEntry.getValue()); + // } + // } + } + + /** + * initialize the metrics + */ + private void initSimilarityMetricMap() { + log.info("initializing similarity measures"); + // Double maxIC = this.classifierEvaluationDao.getMaxFeatureEvaluation( + // null, null, null, + // IntrinsicInfoContentEvaluator.INTRINSIC_INFOCONTENT, 0, 0, + // conceptGraphName); + // Integer maxDepth = this.classifierEvaluationDao + // .getMaxDepth(conceptGraphName); + double maxIC = this.cg.getIntrinsicICMax(); + int maxDepth = this.cg.getDepthMax(); + this.similarityMetricMap = new HashMap( + SimilarityMetricEnum.values().length); + if (maxDepth > 0) { + this.similarityMetricMap.put(SimilarityMetricEnum.LCH, + new LCHMetric(this, maxDepth)); + this.similarityMetricMap.put(SimilarityMetricEnum.LIN, + new LinMetric(this, false)); + this.similarityMetricMap.put(SimilarityMetricEnum.INTRINSIC_LIN, + new LinMetric(this, true)); + this.similarityMetricMap.put(SimilarityMetricEnum.INTRINSIC_LCH, + new IntrinsicLCHMetric(this, maxIC)); + this.similarityMetricMap.put(SimilarityMetricEnum.PATH, + new PathMetric(this)); + this.similarityMetricMap.put(SimilarityMetricEnum.INTRINSIC_PATH, + new IntrinsicPathMetric(this, maxIC)); + this.similarityMetricMap.put(SimilarityMetricEnum.RADA, + new RadaMetric(this, maxDepth)); + this.similarityMetricMap.put(SimilarityMetricEnum.INTRINSIC_RADA, + new IntrinsicRadaMetric(this, maxIC)); + this.similarityMetricMap.put(SimilarityMetricEnum.SOKAL, + new SokalSneathMetric(this)); + this.similarityMetricMap.put(SimilarityMetricEnum.JACCARD, + new JaccardMetric(this)); + this.similarityMetricMap.put(SimilarityMetricEnum.WUPALMER, + new WuPalmerMetric(this)); + } else { + this.similarityMetricMap.put(SimilarityMetricEnum.PAGERANK, + new PageRankMetric(this, this.getPageRankService())); + } + } + + public boolean isPreload() { + return preload; + } + + // /* + // * (non-Javadoc) + // * + // * @see org.apache.ctakes.ytex.kernel.ConceptSimilarity#lch(java.lang.String, + // * java.lang.String) + // */ + // public double lch(String concept1, String concept2) { + // double dm = 2 * cg.getDepthMax() + 1.0; + // ConcRel cr1 = cg.getConceptMap().get(concept1); + // ConcRel cr2 = cg.getConceptMap().get(concept2); + // if (cr1 != null && cr2 != null) { + // Set lcses = new HashSet(); + // int lcsDist = getLCSFromCache(cr1, cr2, lcses); + // // leacock is defined as -log([path length]/(2*[depth]) + // double lch = -Math.log(((double) lcsDist + 1.0) / dm); + // // scale to depth + // return lch / Math.log(dm); + // } else { + // if (log.isDebugEnabled()) { + // if (cr1 == null) + // log.debug("could not find concept:" + concept1); + // if (cr2 == null) + // log.debug("could not find concept:" + concept2); + // } + // return 0; + // } + // } + + public int lcs(String concept1, String concept2, List lcsPaths) { + ConcRel cr1 = cg.getConceptMap().get(concept1); + ConcRel cr2 = cg.getConceptMap().get(concept2); + int dist = -1; + if (cr1 != null && cr2 != null) { + Set crlcses = new HashSet(); + Map crpaths = new HashMap(); + dist = ConcRel.getLeastCommonConcept(cr1, cr2, crlcses, crpaths); + lcsPaths.addAll(crpaths.values()); + } + return dist; + } + + // public double lin(String concept1, String concept2) { + // return filteredLin(concept1, concept2, null); + // } + + /** + * For the given label and cutoff, get the corresponding concepts whose + * propagated ig meets the threshold. Used by lin kernel to find concepts + * that actually have a non-trivial similarity + * + * @param label + * label + * @param rankCutoff + * cutoff + * @param conceptFilter + * set to fill with concepts + * @return double minimum evaluation + */ + @Override + public double loadConceptFilter(String label, int rankCutoff, + Map conceptFilter) { + List imputedConcepts = this.classifierEvaluationDao + .getImputedFeaturesByPropagatedCutoff(corpusName, + conceptSetName, label, lcsImputedType + + ImputedFeatureEvaluator.SUFFIX_IMPUTED, + conceptGraphName, lcsImputedType + + ImputedFeatureEvaluator.SUFFIX_PROP, + rankCutoff); + double minEval = 1d; + for (FeatureRank r : imputedConcepts) { + conceptFilter.put(r.getFeatureName(), r.getEvaluation()); + if (minEval >= r.getEvaluation()) + minEval = r.getEvaluation(); + } + return minEval; + } + + public void setCacheManager(CacheManager cacheManager) { + this.cacheManager = cacheManager; + } + + public void setClassifierEvaluationDao( + ClassifierEvaluationDao classifierEvaluationDao) { + this.classifierEvaluationDao = classifierEvaluationDao; + } + + public void setConceptDao(ConceptDao conceptDao) { + this.conceptDao = conceptDao; + } + + public void setConceptGraphName(String conceptGraphName) { + this.conceptGraphName = conceptGraphName; + } + + public void setConceptSetName(String conceptSetName) { + this.conceptSetName = conceptSetName; + } + + public void setCorpusName(String corpusName) { + this.corpusName = corpusName; + } + + public void setLcsImputedType(String lcsImputedType) { + this.lcsImputedType = lcsImputedType; + } + + // double minEval = 1d; + // List listPropagatedConcepts = classifierEvaluationDao + // .getTopFeatures(corpusName, conceptSetName, label, + // ImputedFeatureEvaluator.MeasureType.INFOGAIN.toString() + // + ImputedFeatureEvaluator.SUFFIX_PROP, 0, 0, + // conceptGraphName, rankCutoff); + // for (FeatureRank r : listPropagatedConcepts) { + // ConcRel cr = cg.getConceptMap().get(r.getFeatureName()); + // if (cr != null) { + // addSubtree(conceptFilterSet, cr); + // } + // if (r.getEvaluation() < minEval) + // minEval = r.getEvaluation(); + // } + // return minEval; + // } + // + // /** + // * add all children of parent to conceptSet. Limit only to children that + // * actually appear in the corpus + // * + // * @param conceptSet + // * set of concepts to add ids to + // * @param parent + // * parent which will be added to the conceptSet + // * @param corpusICSet + // * set of concepts and hypernyms contained in corpus + // */ + // private void addSubtree(Map conceptSet, ConcRel parent) { + // if (!conceptSet.containsKey(parent.getConceptID()) + // && conceptFreq.containsKey(parent.getConceptID())) { + // conceptSet.put(parent.getConceptID(), 0d); + // for (ConcRel child : parent.getChildren()) { + // addSubtree(conceptSet, child); + // } + // } + // } + + public void setPageRankService(PageRankService pageRankService) { + this.pageRankService = pageRankService; + } + + public void setPreload(boolean preload) { + this.preload = preload; + } + + public void setSimilarityMetricMap( + Map similarityMetricMap) { + this.similarityMetricMap = similarityMetricMap; + } + + public void setTransactionManager( + PlatformTransactionManager transactionManager) { + this.transactionManager = transactionManager; + } + + @Override + public List similarity( + List conceptPairs, List metrics, + Map conceptFilter, boolean lcs) { + List conceptSimMap = new ArrayList( + conceptPairs.size()); + for (ConceptPair conceptPair : conceptPairs) { + conceptSimMap.add(similarity(metrics, conceptPair.getConcept1(), + conceptPair.getConcept2(), conceptFilter, lcs)); + } + return conceptSimMap; + } + + /** + * + */ + @Override + public ConceptPairSimilarity similarity(List metrics, + String concept1, String concept2, + Map conceptFilter, boolean lcs) { + // allocate simInfo if this isn't provided + SimilarityInfo simInfo = new SimilarityInfo(); + if (lcs) + simInfo.setLcsPaths(new ArrayList(1)); + // allocate result map + List similarities = new ArrayList(metrics.size()); + if (cg != null) { + // iterate over metrics, compute, stuff in map + for (SimilarityMetricEnum metric : metrics) { + double sim = this.similarityMetricMap.get(metric).similarity( + concept1, concept2, conceptFilter, simInfo); + similarities.add(sim); + } + } + ConceptPairSimilarity csim = new ConceptPairSimilarity(); + csim.setConceptPair(new ConceptPair(concept1, concept2)); + csim.setSimilarities(similarities); + csim.setSimilarityInfo(simInfo); + return csim; + } + + /** + * convert the list of tuis into a bitset + * + * @param tuis + * @param mapTuiIndex + * @return + */ + private BitSet tuiListToBitset(Set tuis, + SortedMap mapTuiIndex) { + BitSet bs = new BitSet(mapTuiIndex.size()); + for (String tui : tuis) { + bs.set(mapTuiIndex.get(tui)); + } + return bs; + } +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicLCHMetric.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicLCHMetric.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicLCHMetric.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicLCHMetric.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,38 @@ +package org.apache.ctakes.ytex.kernel.metric; + +import java.util.Map; + +/** + * compute intrinsic LCH as in eqn 28 from + * http://dx.doi.org/10.1016/j.jbi.2011.03.013 + * + * Scale to unit interval + * + * @author vijay + * + */ +public class IntrinsicLCHMetric extends BaseSimilarityMetric { + double logMaxIC2 = 0d; + + public IntrinsicLCHMetric(ConceptSimilarityService simSvc, Double maxIC) { + super(simSvc); + if (maxIC != null) + this.logMaxIC2 = Math.log(2 * maxIC.doubleValue()) + 1d; + } + + @Override + public double similarity(String concept1, String concept2, + Map conceptFilter, SimilarityInfo simInfo) { + double sim = 0d; + if (logMaxIC2 != 0d) { + double ic1 = simSvc.getIC(concept1, true); + double ic2 = simSvc.getIC(concept2, true); + double lcsIC = initLcsIC(concept1, concept2, conceptFilter, + simInfo, true); + sim = 1 - (Math.log(ic1 + ic2 - 2 * (lcsIC) + 1) / logMaxIC2); + + } + return sim; + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicPathMetric.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicPathMetric.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicPathMetric.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicPathMetric.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,35 @@ +package org.apache.ctakes.ytex.kernel.metric; + +import java.util.Map; + + +/** + * compute Intrinsic path distance. Scale the distance to the unit + * interval using max IC. + * + * @author vijay + * + */ +public class IntrinsicPathMetric extends BaseSimilarityMetric { + Double maxIC; + + public IntrinsicPathMetric(ConceptSimilarityService simSvc, Double maxIC) { + super(simSvc); + this.maxIC = maxIC; + } + + @Override + public double similarity(String concept1, String concept2, + Map conceptFilter, SimilarityInfo simInfo) { + if (maxIC == null) + return 0d; + double lcsIC = this.initLcsIC(concept1, concept2, conceptFilter, simInfo, true); + if (lcsIC == 0d) + return 0d; + double ic1 = simSvc.getIC(concept1, true); + double ic2 = simSvc.getIC(concept2, true); + // scale to unit interval + return 1d/(ic1 + ic2 - (2 * lcsIC) + 1); + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicRadaMetric.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicRadaMetric.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicRadaMetric.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/IntrinsicRadaMetric.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,37 @@ +package org.apache.ctakes.ytex.kernel.metric; + +import java.util.Map; + + +/** + * compute Intrinsic rada distance as in eqn 23 from + * http://dx.doi.org/10.1016/j.jbi.2011.03.013. Scale the distance to the unit + * interval using max IC. Convert to similarity metric by taking + * 1-scaled_distance. + * + * @author vijay + * + */ +public class IntrinsicRadaMetric extends BaseSimilarityMetric { + Double maxIC; + + public IntrinsicRadaMetric(ConceptSimilarityService simSvc, Double maxIC) { + super(simSvc); + this.maxIC = maxIC; + } + + @Override + public double similarity(String concept1, String concept2, + Map conceptFilter, SimilarityInfo simInfo) { + if (maxIC == null) + return 0d; + double lcsIC = this.initLcsIC(concept1, concept2, conceptFilter, simInfo, true); + if (lcsIC == 0d) + return 0d; + double ic1 = simSvc.getIC(concept1, true); + double ic2 = simSvc.getIC(concept2, true); + // scale to unit interval + return 1d - (ic1 + ic2 - (2 * lcsIC)) / (2 * maxIC); + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/JaccardMetric.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/JaccardMetric.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/JaccardMetric.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/JaccardMetric.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,30 @@ +package org.apache.ctakes.ytex.kernel.metric; + +import java.util.Map; + + +/** + * Jaccard metric as in eqn 13 from http://dx.doi.org/10.1016/j.jbi.2011.03.013 + * + * @author vijay + * + */ +public class JaccardMetric extends BaseSimilarityMetric { + + public JaccardMetric(ConceptSimilarityService simSvc) { + super(simSvc); + } + + @Override + public double similarity(String concept1, String concept2, + Map conceptFilter, SimilarityInfo simInfo) { + double lcsIC = this.initLcsIC(concept1, concept2, conceptFilter, + simInfo, true); + if (lcsIC == 0d) + return 0d; + double ic1 = simSvc.getIC(concept1, true); + double ic2 = simSvc.getIC(concept2, true); + return lcsIC / (ic1 + ic2 - lcsIC); + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/LCHMetric.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/LCHMetric.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/LCHMetric.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/LCHMetric.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,33 @@ +package org.apache.ctakes.ytex.kernel.metric; + +import java.util.Map; + +public class LCHMetric extends BaseSimilarityMetric { + /** + * log(max depth * 2) + */ + double logdm = 0d; + + @Override + public double similarity(String concept1, String concept2, + Map conceptFilter, SimilarityInfo simInfo) { + if (logdm != 0d) { + initLCSes(concept1, concept2, simInfo); + if (simInfo.getLcsDist() > 0) { + // double lch = logdm - Math.log((double) simInfo.getLcsDist()); + // // scale to depth + // return lch / logdm; + return 1 - (Math.log((double) simInfo.getLcsDist()) / logdm); + } + } + return 0d; + } + + public LCHMetric(ConceptSimilarityService simSvc, Integer maxDepth) { + super(simSvc); + if (maxDepth != null) { + this.logdm = Math.log(2 * maxDepth); + } + } + +}