Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 372CA10EEB for ; Mon, 16 Dec 2013 16:33:59 +0000 (UTC) Received: (qmail 81742 invoked by uid 500); 16 Dec 2013 16:32:53 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 81268 invoked by uid 500); 16 Dec 2013 16:32:22 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 81196 invoked by uid 99); 16 Dec 2013 16:32:18 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 16 Dec 2013 16:32:18 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED,T_FRT_PROFILE2 X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 16 Dec 2013 16:32:14 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 33B912388C6A; Mon, 16 Dec 2013 16:30:59 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1551254 [20/26] - in /ctakes/branches/ytex: ctakes-ytex-res/ ctakes-ytex-res/.settings/ ctakes-ytex-res/src/ ctakes-ytex-res/src/main/ ctakes-ytex-res/src/main/resources/ ctakes-ytex-res/src/main/resources/org/ ctakes-ytex-res/src/main/res... Date: Mon, 16 Dec 2013 16:30:40 -0000 To: commits@ctakes.apache.org From: vjapache@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20131216163059.33B912388C6A@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ClassifierEvaluationDaoImpl.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ClassifierEvaluationDaoImpl.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ClassifierEvaluationDaoImpl.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ClassifierEvaluationDaoImpl.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,380 @@ +package org.apache.ctakes.ytex.kernel.dao; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.ctakes.ytex.dao.DBUtil; +import org.apache.ctakes.ytex.kernel.InfoContentEvaluator; +import org.apache.ctakes.ytex.kernel.IntrinsicInfoContentEvaluator; +import org.apache.ctakes.ytex.kernel.metric.ConceptInfo; +import org.apache.ctakes.ytex.kernel.model.ClassifierEvaluation; +import org.apache.ctakes.ytex.kernel.model.ClassifierEvaluationIRStat; +import org.apache.ctakes.ytex.kernel.model.ClassifierInstanceEvaluation; +import org.apache.ctakes.ytex.kernel.model.CrossValidationFold; +import org.apache.ctakes.ytex.kernel.model.FeatureEvaluation; +import org.apache.ctakes.ytex.kernel.model.FeatureParentChild; +import org.apache.ctakes.ytex.kernel.model.FeatureRank; +import org.hibernate.Query; +import org.hibernate.SessionFactory; +import org.hibernate.type.Type; + + +public class ClassifierEvaluationDaoImpl implements ClassifierEvaluationDao { + private static final Log log = LogFactory + .getLog(ClassifierEvaluationDaoImpl.class); + private SessionFactory sessionFactory; + + public SessionFactory getSessionFactory() { + return sessionFactory; + } + + public void setSessionFactory(SessionFactory sessionFactory) { + this.sessionFactory = sessionFactory; + } + + @SuppressWarnings("unchecked") + @Override + public void deleteCrossValidationFoldByName(String corpusName, + String splitName) { + Query q = this.getSessionFactory().getCurrentSession() + .getNamedQuery("getCrossValidationFoldByName"); + q.setString("corpusName", corpusName); + q.setString("splitName", nullToEmptyString(splitName)); + List folds = q.list(); + for (CrossValidationFold fold : folds) + this.getSessionFactory().getCurrentSession().delete(fold); + } + + @Override + public CrossValidationFold getCrossValidationFold(String corpusName, + String splitName, String label, int run, int fold) { + Query q = this.getSessionFactory().getCurrentSession() + .getNamedQuery("getCrossValidationFold"); + q.setString("corpusName", corpusName); + q.setString("splitName", nullToEmptyString(splitName)); + q.setString("label", nullToEmptyString(label)); + q.setInteger("run", run); + q.setInteger("fold", fold); + return (CrossValidationFold) q.uniqueResult(); + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.ctakes.ytex.kernel.dao.ClassifierEvaluationDao#saveClassifierEvaluation(org.apache.ctakes.ytex + * .kernel.model.ClassifierEvaluation) + */ + public void saveClassifierEvaluation(ClassifierEvaluation eval, + Map irClassMap, boolean saveInstanceEval) { + saveClassifierEvaluation(eval, irClassMap, saveInstanceEval, true, null); + } + + public void saveClassifierEvaluation(ClassifierEvaluation eval, + Map irClassMap, boolean saveInstanceEval, + boolean saveIRStats, Integer excludeTargetClassId) { + this.getSessionFactory().getCurrentSession().save(eval); + if (saveIRStats) + this.saveIRStats(eval, irClassMap, excludeTargetClassId); + if (saveInstanceEval) { + for (ClassifierInstanceEvaluation instanceEval : eval + .getClassifierInstanceEvaluations().values()) { + this.getSessionFactory().getCurrentSession().save(instanceEval); + } + } + } + + void saveIRStats(ClassifierEvaluation eval, + Map irClassMap, Integer excludeTargetClassId) { + Set classIds = this.getClassIds(eval, excludeTargetClassId); + // setup stats + for (Integer irClassId : classIds) { + String irClass = null; + if (irClassMap != null) + irClass = irClassMap.get(irClassId); + if (irClass == null) + irClass = Integer.toString(irClassId); + ClassifierEvaluationIRStat irStat = calcIRStats(irClass, irClassId, + eval, excludeTargetClassId); + this.getSessionFactory().getCurrentSession().save(irStat); + } + } + + /** + * + * @param irClassId + * the target class id with respect to ir statistics will be + * calculated + * @param eval + * the object to update + * @param excludeTargetClassId + * class id to be excluded from computation of ir stats. + * @return + */ + private ClassifierEvaluationIRStat calcIRStats(String irClass, + Integer irClassId, ClassifierEvaluation eval, + Integer excludeTargetClassId) { + int tp = 0; + int tn = 0; + int fp = 0; + int fn = 0; + for (ClassifierInstanceEvaluation instanceEval : eval + .getClassifierInstanceEvaluations().values()) { + + if (instanceEval.getTargetClassId() != null + && (excludeTargetClassId == null || instanceEval + .getTargetClassId() != excludeTargetClassId + .intValue())) { + if (instanceEval.getTargetClassId() == irClassId) { + if (instanceEval.getPredictedClassId() == instanceEval + .getTargetClassId()) { + tp++; + } else { + fn++; + } + } else { + if (instanceEval.getPredictedClassId() == irClassId) { + fp++; + } else { + tn++; + } + } + } + } + return new ClassifierEvaluationIRStat(eval, null, irClass, irClassId, + tp, tn, fp, fn); + } + + private Set getClassIds(ClassifierEvaluation eval, + Integer excludeTargetClassId) { + Set classIds = new HashSet(); + for (ClassifierInstanceEvaluation instanceEval : eval + .getClassifierInstanceEvaluations().values()) { + classIds.add(instanceEval.getPredictedClassId()); + if (instanceEval.getTargetClassId() != null + && (excludeTargetClassId == null || instanceEval + .getTargetClassId() != excludeTargetClassId + .intValue())) + classIds.add(instanceEval.getTargetClassId()); + } + return classIds; + } + + @Override + public void saveFold(CrossValidationFold fold) { + this.getSessionFactory().getCurrentSession().save(fold); + } + + // @Override + // public void saveInfogain(List foldInfogainList) { + // for(FeatureInfogain ig : foldInfogainList) { + // this.getSessionFactory().getCurrentSession().save(ig); + // } + // } + + @Override + public void saveFeatureEvaluation(FeatureEvaluation featureEvaluation, + List features) { + this.getSessionFactory().getCurrentSession().save(featureEvaluation); + for (FeatureRank r : features) + this.getSessionFactory().getCurrentSession().save(r); + } + + @SuppressWarnings("unchecked") + @Override + public void deleteFeatureEvaluationByNameAndType(String corpusName, + String featureSetName, String type) { + Query q = this.getSessionFactory().getCurrentSession() + .getNamedQuery("getFeatureEvaluationByNameAndType"); + q.setString("corpusName", corpusName); + q.setString("featureSetName", nullToEmptyString(featureSetName)); + q.setString("type", type); + for (FeatureEvaluation fe : (List) q.list()) + this.getSessionFactory().getCurrentSession().delete(fe); + } + + @SuppressWarnings("unchecked") + @Override + public List getTopFeatures(String corpusName, + String featureSetName, String label, String evaluationType, + Integer foldId, double param1, String param2, + Integer parentConceptTopThreshold) { + Query q = prepareUniqueFeatureEvalQuery(corpusName, featureSetName, + label, evaluationType, foldId, param1, param2, "getTopFeatures"); + q.setMaxResults(parentConceptTopThreshold); + return q.list(); + } + + @Override + public Double getMaxFeatureEvaluation(String corpusName, + String featureSetName, String label, String evaluationType, + Integer foldId, double param1, String param2) { + Query q = prepareUniqueFeatureEvalQuery(corpusName, featureSetName, + label, evaluationType, foldId, param1, param2, + "getMaxFeatureEvaluation"); + return (Double) q.uniqueResult(); + } + + private Query prepareUniqueFeatureEvalQuery(String corpusName, + String featureSetName, String label, String evaluationType, + Integer foldId, Double param1, String param2, String queryName) { + Query q = this.sessionFactory.getCurrentSession().getNamedQuery( + queryName); + q.setString("corpusName", nullToEmptyString(corpusName)); + q.setString("featureSetName", nullToEmptyString(featureSetName)); + q.setString("label", nullToEmptyString(label)); + q.setString("evaluationType", evaluationType); + q.setDouble("param1", param1 == null ? 0 : param1); + q.setString("param2", nullToEmptyString(param2)); + q.setInteger("crossValidationFoldId", foldId == null ? 0 : foldId); + return q; + } + + /** + * todo for oracle need to handle empty strings differently + * + * @param param1 + * @return + */ + private String nullToEmptyString(String param1) { + return DBUtil.nullToEmptyString(param1); + } + + @SuppressWarnings("unchecked") + @Override + public List getThresholdFeatures(String corpusName, + String featureSetName, String label, String evaluationType, + Integer foldId, double param1, String param2, + double evaluationThreshold) { + Query q = prepareUniqueFeatureEvalQuery(corpusName, featureSetName, + label, evaluationType, foldId, param1, param2, + "getThresholdFeatures"); + q.setDouble("evaluation", evaluationThreshold); + return q.list(); + } + + @Override + public void deleteFeatureEvaluation(String corpusName, + String featureSetName, String label, String evaluationType, + Integer foldId, Double param1, String param2) { + Query q = prepareUniqueFeatureEvalQuery(corpusName, featureSetName, + label, evaluationType, foldId, param1, param2, + "getFeatureEvaluationByNK"); + FeatureEvaluation fe = (FeatureEvaluation) q.uniqueResult(); + if (fe != null) { + // for some reason this isn't working - execute batch updates + // this.sessionFactory.getCurrentSession().delete(fe); + q = this.sessionFactory.getCurrentSession().getNamedQuery( + "deleteFeatureRank"); + q.setInteger("featureEvaluationId", fe.getFeatureEvaluationId()); + q.executeUpdate(); + q = this.sessionFactory.getCurrentSession().getNamedQuery( + "deleteFeatureEval"); + q.setInteger("featureEvaluationId", fe.getFeatureEvaluationId()); + q.executeUpdate(); + } + } + + public Map getFeatureRanks(Set featureNames, + String corpusName, String featureSetName, String label, + String evaluationType, Integer foldId, double param1, String param2) { + Query q = prepareUniqueFeatureEvalQuery(corpusName, featureSetName, + label, evaluationType, foldId, param1, param2, + "getFeatureRankEvaluations"); + q.setParameterList("featureNames", featureNames); + @SuppressWarnings("unchecked") + List featureRanks = q.list(); + Map frMap = new HashMap( + featureRanks.size()); + for (FeatureRank fr : featureRanks) + frMap.put(fr.getFeatureName(), fr); + return frMap; + } + + public Map getFeatureRankEvaluations( + Set featureNames, String corpusName, String featureSetName, + String label, String evaluationType, Integer foldId, double param1, + String param2) { + Query q = prepareUniqueFeatureEvalQuery(corpusName, featureSetName, + label, evaluationType, foldId, param1, param2, + "getFeatureRankEvaluations"); + q.setParameterList("featureNames", featureNames); + List featureRanks = q.list(); + Map evalMap = new HashMap( + featureRanks.size()); + for (FeatureRank fr : featureRanks) + evalMap.put(fr.getFeatureName(), fr.getEvaluation()); + return evalMap; + } + + @Override + public Map getFeatureRankEvaluations(String corpusName, + String featureSetName, String label, String evaluationType, + Integer foldId, double param1, String param2) { + Query q = prepareUniqueFeatureEvalQuery(corpusName, featureSetName, + label, evaluationType, foldId, param1, param2, "getTopFeatures"); + @SuppressWarnings("unchecked") + List listFeatureRank = q.list(); + Map mapFeatureEval = new HashMap( + listFeatureRank.size()); + for (FeatureRank r : listFeatureRank) { + mapFeatureEval.put(r.getFeatureName(), r.getEvaluation()); + } + return mapFeatureEval; + } + + @Override + @SuppressWarnings("unchecked") + public List getCorpusCuiTuis(String corpusName, + String conceptGraphName, String conceptSetName) { + Query q = prepareUniqueFeatureEvalQuery(corpusName, conceptSetName, + null, InfoContentEvaluator.INFOCONTENT, 0, 0d, + conceptGraphName, "getCorpusCuiTuis"); + return q.list(); + } + + @Override + public Map getInfoContent(String corpusName, + String conceptGraphName, String conceptSet) { + return getFeatureRankEvaluations(corpusName, conceptSet, null, + InfoContentEvaluator.INFOCONTENT, 0, 0, conceptGraphName); + } + + @Override + public List getIntrinsicInfoContent( + String conceptGraphName) { + Query q = prepareUniqueFeatureEvalQuery(null, null, null, + IntrinsicInfoContentEvaluator.INTRINSIC_INFOCONTENT, null, null, + conceptGraphName, "getIntrinsicInfoContent"); + return (List)q.list(); + } + public Integer getMaxDepth(String conceptGraphName) { + Query q = prepareUniqueFeatureEvalQuery(null, null, null, + IntrinsicInfoContentEvaluator.INTRINSIC_INFOCONTENT, null, null, + conceptGraphName, "getMaxFeatureRank"); + return (Integer)q.uniqueResult(); + } + + @Override + public void saveFeatureParentChild(FeatureParentChild parchd) { + this.sessionFactory.getCurrentSession().save(parchd); + } + + @Override + public List getImputedFeaturesByPropagatedCutoff( + String corpusName, String conceptSetName, String label, + String evaluationType, String conceptGraphName, + String propEvaluationType, int propRankCutoff) { + Query q = prepareUniqueFeatureEvalQuery(corpusName, conceptSetName, + label, evaluationType, 0, 0d, conceptGraphName, + "getImputedFeaturesByPropagatedCutoff"); + q.setInteger("propRankCutoff", propRankCutoff); + q.setString("propEvaluationType", propEvaluationType); + return q.list(); + } +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ConceptDao.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ConceptDao.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ConceptDao.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ConceptDao.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,43 @@ +package org.apache.ctakes.ytex.kernel.dao; + +import java.io.IOException; +import java.util.Set; + +import org.apache.ctakes.ytex.kernel.model.ConceptGraph; + + +/** + * create/retrieve concept graphs. store concept graph on file system as they + * can get big (>10MB). This is not a problem for sql server/oracle, but may + * require increasing the max_packet_size on mysql. + * + * @author vijay + * + */ +public interface ConceptDao { + + /** + * retrieve an existing concept graph. + * + * @param name + * name of concept graph. Will retrieve from file system. @see + * #createConceptGraph + * @return + */ + public abstract ConceptGraph getConceptGraph(String name); + + /** + * create the concept graph with specified name using specified query. + * + * @param name + * name of concept graph. will create file + * ${org.apache.ctakes.ytex.conceptGraphDir}/[name].gz + * @param query + * returns 2 string columns, 1st column is the child concept, 2nd + * column is the parent concept. + * @return ConceptGraph the concept graph generated using this query. + */ + public abstract void createConceptGraph(String name, String query, + final boolean checkCycle, final Set forbiddenConcepts) throws IOException; + +} \ No newline at end of file Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ConceptDaoImpl.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ConceptDaoImpl.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ConceptDaoImpl.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ConceptDaoImpl.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,533 @@ +package org.apache.ctakes.ytex.kernel.dao; + +import gnu.trove.set.TIntSet; +import gnu.trove.set.hash.TIntHashSet; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.OutputStream; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Date; +import java.util.HashSet; +import java.util.Properties; +import java.util.Set; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +import javax.sql.DataSource; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.ctakes.ytex.kernel.FileUtil; +import org.apache.ctakes.ytex.kernel.IntrinsicInfoContentEvaluator; +import org.apache.ctakes.ytex.kernel.KernelContextHolder; +import org.apache.ctakes.ytex.kernel.model.ConcRel; +import org.apache.ctakes.ytex.kernel.model.ConceptGraph; +import org.hibernate.SessionFactory; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.jdbc.core.RowCallbackHandler; + + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +public class ConceptDaoImpl implements ConceptDao { + /** + * the default concept id for the root. override with -Dytex.defaultRootId + */ + private static final String DEFAULT_ROOT_ID = "C0000000"; + /** + * ignore forbidden concepts. list Taken from umls-interface. f concept is + * one of the following just return #C1274012|Ambiguous concept (inactive + * concept) if($concept=~/C1274012/) { return 1; } #C1274013|Duplicate + * concept (inactive concept) if($concept=~/C1274013/) { return 1; } + * #C1276325|Reason not stated concept (inactive concept) + * if($concept=~/C1276325/) { return 1; } #C1274014|Outdated concept + * (inactive concept) if($concept=~/C1274014/) { return 1; } + * #C1274015|Erroneous concept (inactive concept) if($concept=~/C1274015/) { + * return 1; } #C1274021|Moved elsewhere (inactive concept) + * if($concept=~/C1274021/) { return 1; } #C1443286|unapproved attribute + * if($concept=~/C1443286/) { return 1; } #C1274012|non-current concept - + * ambiguous if($concept=~/C1274012/) { return 1; } #C2733115|limited status + * concept if($concept=~/C2733115/) { return 1; } + */ + private static final String defaultForbiddenConceptArr[] = new String[] { + "C1274012", "C1274013", "C1276325", "C1274014", "C1274015", + "C1274021", "C1443286", "C1274012", "C2733115" }; + private static Set defaultForbiddenConcepts; + private static final Log log = LogFactory.getLog(ConceptDaoImpl.class); + + static { + defaultForbiddenConcepts = new HashSet(); + defaultForbiddenConcepts.addAll(Arrays + .asList(defaultForbiddenConceptArr)); + } + + /** + * create a concept graph. 1st param - name of concept graph. 2nd param - + * query to retrieve parent-child pairs. + * + * @param args + */ + @SuppressWarnings("static-access") + public static void main(String args[]) throws ParseException, IOException { + Options options = new Options(); + options.addOption(OptionBuilder + .withArgName("prop") + .hasArg() + .isRequired() + .withDescription( + "property file with queries and other parameters. todo desc") + .create("prop")); + try { + CommandLineParser parser = new GnuParser(); + CommandLine line = parser.parse(options, args); + Properties props = FileUtil.loadProperties( + line.getOptionValue("prop"), true); + String conceptGraphName = props + .getProperty("org.apache.ctakes.ytex.conceptGraphName"); + String conceptGraphQuery = props + .getProperty("org.apache.ctakes.ytex.conceptGraphQuery"); + String strCheckCycle = props.getProperty("org.apache.ctakes.ytex.checkCycle", "true"); + String forbiddenConceptList = props + .getProperty("org.apache.ctakes.ytex.forbiddenConcepts"); + Set forbiddenConcepts; + if (forbiddenConceptList != null) { + forbiddenConcepts = new HashSet(); + forbiddenConcepts.addAll(Arrays.asList(forbiddenConceptList + .split(","))); + } else { + forbiddenConcepts = defaultForbiddenConcepts; + } + boolean checkCycle = true; + if ("false".equalsIgnoreCase(strCheckCycle) + || "no".equalsIgnoreCase(strCheckCycle)) + checkCycle = false; + if (conceptGraphName != null && conceptGraphQuery != null) { + KernelContextHolder + .getApplicationContext() + .getBean(ConceptDao.class) + .createConceptGraph(conceptGraphName, + conceptGraphQuery, checkCycle, + forbiddenConcepts); + } else { + printHelp(options); + } + } catch (ParseException pe) { + printHelp(options); + } + } + + private static void printHelp(Options options) { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("java " + ConceptDaoImpl.class.getName() + + " generate concept graph", options); + } + + private IntrinsicInfoContentEvaluator intrinsicInfoContentEvaluator; + + private JdbcTemplate jdbcTemplate; + + private SessionFactory sessionFactory; + + private Properties ytexProperties; + + /** + * add the relationship to the concept map + * + * @param conceptMap + * @param conceptIndexMap + * @param conceptList + * @param roots + * @param conceptPair + */ + private void addRelation(ConceptGraph cg, Set roots, + String childCUI, String parentCUI, boolean checkCycle, + Set forbiddenConcepts) { + if (forbiddenConcepts.contains(childCUI) + || forbiddenConcepts.contains(parentCUI)) { + // ignore relationships to useless concepts + if (log.isDebugEnabled()) + log.debug("skipping relation because of forbidden concept: par=" + + parentCUI + " child=" + childCUI); + return; + } + // ignore self relations + if (!childCUI.equals(parentCUI)) { + boolean parNull = false; + // get parent from cui map + ConcRel crPar = cg.getConceptMap().get(parentCUI); + if (crPar == null) { + parNull = true; + // parent not in cui map - add it + crPar = cg.addConcept(parentCUI); + // this is a candidate root - add it to the set of roots + roots.add(parentCUI); + } + // get the child cui + ConcRel crChild = cg.getConceptMap().get(childCUI); + // crPar already has crChild, return + if (crChild != null && crPar.getChildren().contains(crChild)) + return; + // avoid cycles - don't add child cui if it is an ancestor + // of the parent. if the child is not yet in the map, then it can't + // possibly induce a cycle. + // if the parent is not yet in the map, it can't induce a cycle + // else check for cycles + // @TODO: this is very inefficient. implement feedback arc algo + boolean bCycle = !parNull && crChild != null && checkCycle + && checkCycle(crPar, crChild); + if (bCycle) { + log.warn("skipping relation that induces cycle: par=" + + parentCUI + ", child=" + childCUI); + } else { + if (crChild == null) { + // child not in cui map - add it + crChild = cg.addConcept(childCUI); + } else { + // remove the cui from the list of candidate roots + if (roots.contains(childCUI)) + roots.remove(childCUI); + } + // link child to parent and vice-versa + crPar.getChildren().add(crChild); + crChild.getParents().add(crPar); + } + } + } + + /* + * (non-Javadoc) + * + * @see org.apache.ctakes.ytex.kernel.dao.ConceptDao#createConceptGraph + */ + @Override + public void createConceptGraph(String name, String query, + final boolean checkCycle, final Set forbiddenConcepts) + throws IOException { + ConceptGraph conceptGraph = getConceptGraph(name); + if (conceptGraph != null) { + if (log.isWarnEnabled()) + log.warn("createConceptGraph(): concept graph already exist, exiting"); + } else { + if (log.isInfoEnabled()) + log.info("createConceptGraph(): file not found, initializing concept graph from database."); + // final Map conceptMap = new HashMap(); + // final List conceptList = new ArrayList(); + // final Map conceptIndexMap = new HashMap(); + final ConceptGraph cg = new ConceptGraph(); + final Set roots = new HashSet(); + this.jdbcTemplate.query(query, new RowCallbackHandler() { + int nRowsProcessed = 0; + + @Override + public void processRow(ResultSet rs) throws SQLException { + String child = rs.getString(1); + String parent = rs.getString(2); + addRelation(cg, roots, child, parent, checkCycle, + forbiddenConcepts); + nRowsProcessed++; + if (nRowsProcessed % 10000 == 0) { + log.info("processed " + nRowsProcessed + " edges"); + } + } + }); + // set the root + // if there is only one potential root, use it + // else use a synthetic root and add all the roots as its children + String rootId = null; + if (log.isDebugEnabled()) + log.debug("roots: " + roots); + if (roots.size() == 1) { + rootId = roots.iterator().next(); + } else { + rootId = System.getProperty("org.apache.ctakes.ytex.defaultRootId", + DEFAULT_ROOT_ID); + ConcRel crRoot = cg.addConcept(rootId); + for (String crChildId : roots) { + ConcRel crChild = cg.getConceptMap().get(crChildId); + crRoot.getChildren().add(crChild); + crChild.getParents().add(crRoot); + } + } + cg.setRoot(rootId); + // // can't get the maximum depth unless we're sure there are no + // cycles + // if (checkCycle) + // cg.setDepthMax(calculateDepthMax(rootId, cg.getConceptMap())); + if (checkCycle) { + log.info("computing intrinsic info for concept graph: " + name); + this.intrinsicInfoContentEvaluator + .evaluateIntrinsicInfoContent(name, + getConceptGraphDir(), cg); + } + log.info("writing concept graph: " + name); + writeConceptGraph(name, cg); + writeConceptGraphProps(name, query, checkCycle); + } + } + + /* + * (non-Javadoc) + * + * @see org.apache.ctakes.ytex.kernel.dao.ConceptDao#getConceptGraph(java.util.Set) + */ + public ConceptGraph getConceptGraph(String name) { + File f = new File(getConceptGraphFileName(name)); + if (log.isInfoEnabled()) + log.info("getConceptGraph(" + name + + ") initializing concept graph from file: " + f.getPath()); + if (f.exists()) { + if (log.isInfoEnabled()) + log.info("getConceptGraph(" + name + + ") file exists, reading concept graph"); + return initializeConceptGraph(this.readConceptGraph(f)); + } else { + return null; + } + } + + public String getConceptGraphDir() { + String cdir = ytexProperties.getProperty("org.apache.ctakes.ytex.conceptGraphDir"); + if (cdir == null || cdir.length() == 0) { + // see if org.apache.ctakes.ytex home is defined in org.apache.ctakes.ytex properties + String ytexHome = ytexProperties.getProperty("org.apache.ctakes.ytex.home"); + if (ytexHome == null || ytexHome.length() == 0) { + // see if org.apache.ctakes.ytex home is defined in the environment + ytexHome = System.getenv().get("YTEX_HOME"); + } + if (ytexHome == null || ytexHome.length() == 0) { + log.warn("none of org.apache.ctakes.ytex.conceptGraphDir, org.apache.ctakes.ytex.home, or YTEX_HOME are defined - assuming conceptGraphDir is ./conceptGraph"); + // default to current directory + ytexHome = "."; + } + cdir = ytexHome + File.separator + "conceptGraph"; + } + return cdir; + } + + private String getConceptGraphFileName(String name) { + return getConceptGraphDir() + File.separator + name + ".gz"; + } + + public DataSource getDataSource(DataSource ds) { + return this.jdbcTemplate.getDataSource(); + } + + public IntrinsicInfoContentEvaluator getIntrinsicInfoContentEvaluator() { + return intrinsicInfoContentEvaluator; + } + + public SessionFactory getSessionFactory() { + return sessionFactory; + } + + public Properties getYtexProperties() { + return ytexProperties; + } + + private boolean checkCycle(ConcRel crPar, ConcRel crChild) { + TIntSet visitedNodes = new TIntHashSet(); + return hasAncestor(crPar, crChild, visitedNodes); + } + + /** + * check cycle. + * + * @param crPar + * parent + * @param crChild + * child that should not be an ancestor of parent + * @param visitedNodes + * nodes we've visited in our search. keep track of this to avoid + * visiting the same node multiple times + * @return true if crChild is an ancestor of crPar + */ + private boolean hasAncestor(ConcRel crPar, ConcRel crChild, + TIntSet visitedNodes) { + // see if we've already visited this node - if yes then no need to redo + // this + if (visitedNodes.contains(crPar.getNodeIndex())) + return false; + // see if we're the same + if (crPar.getNodeIndex() == crChild.getNodeIndex()) + return true; + // recurse + for (ConcRel c : crPar.getParents()) { + if (hasAncestor(c, crChild, visitedNodes)) + return true; + } + // add ourselves to the set of visited nodes so we no not to revisit + // this + visitedNodes.add(crPar.getNodeIndex()); + return false; + } + + /** + * replace cui strings in concrel with references to other nodes. initialize + * the concept list + * + * @param cg + * @return + */ + private ConceptGraph initializeConceptGraph(ConceptGraph cg) { + ImmutableMap.Builder mb = new ImmutableMap.Builder(); + for (ConcRel cr : cg.getConceptList()) { + // use adjacency list representation for concept graphs that have + // cycles + if (cg.getDepthMax() > 0) + cr.constructRel(cg.getConceptList()); + mb.put(cr.getConceptID(), cr); + } + cg.setConceptMap(mb.build()); + return cg; + } + + private ConceptGraph readConceptGraph(File file) { + ObjectInputStream is = null; + try { + is = new ObjectInputStream(new BufferedInputStream( + new GZIPInputStream(new FileInputStream(file)))); + return (ConceptGraph) is.readObject(); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } finally { + if (is != null) + try { + is.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + } + + public void setDataSource(DataSource ds) { + this.jdbcTemplate = new JdbcTemplate(ds); + } + + public void setIntrinsicInfoContentEvaluator( + IntrinsicInfoContentEvaluator intrinsicInfoContentEvaluator) { + this.intrinsicInfoContentEvaluator = intrinsicInfoContentEvaluator; + } + + // /** + // * get maximum depth of graph. + // * + // * @param roots + // * @param conceptMap + // * @return + // */ + // private int calculateDepthMax(String rootId, Map + // conceptMap) { + // ConcRel crRoot = conceptMap.get(rootId); + // return crRoot.depthMax(); + // } + + public void setSessionFactory(SessionFactory sessionFactory) { + this.sessionFactory = sessionFactory; + } + + public void setYtexProperties(Properties ytexProperties) { + this.ytexProperties = new Properties(ytexProperties); + this.ytexProperties.putAll(System.getProperties()); + } + + // /** + // * add parent to all descendants of crChild + // * + // * @param crPar + // * @param crChild + // * @param ancestorCache + // */ + // private void updateDescendants(Set ancestorsPar, ConcRel + // crChild, + // Map> ancestorCache, int depth) { + // if (ancestorCache != null) { + // Set ancestors = ancestorCache.get(crChild.nodeIndex); + // if (ancestors != null) + // ancestors.addAll(ancestorsPar); + // // recurse + // for (ConcRel crD : crChild.getChildren()) { + // updateDescendants(ancestorsPar, crD, ancestorCache, depth + 1); + // } + // } + // } + + /** + * write the concept graph, create parent directories as required + * + * @param name + * @param cg + */ + private void writeConceptGraph(String name, ConceptGraph cg) { + ObjectOutputStream os = null; + File cgFile = new File(getConceptGraphFileName(name)); + if (!cgFile.getParentFile().exists()) + cgFile.getParentFile().mkdirs(); + try { + os = new ObjectOutputStream(new BufferedOutputStream( + new GZIPOutputStream(new FileOutputStream(cgFile)))); + // replace the writable list with an immutable list + cg.setConceptList(ImmutableList.copyOf(cg.getConceptList())); + os.writeObject(cg); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } finally { + if (os != null) + try { + os.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + + private void writeConceptGraphProps(String name, String query, + boolean checkCycle) { + File propFile = new File(FileUtil.addFilenameToDir( + this.getConceptGraphDir(), name + ".xml")); + try { + if (!propFile.exists()) { + Properties props = new Properties(); + props.put("org.apache.ctakes.ytex.conceptGraphQuery", query); + props.put("org.apache.ctakes.ytex.conceptGraphName", name); + props.put("org.apache.ctakes.ytex.checkCycle", checkCycle ? "true" : "false"); + OutputStream os = null; + try { + os = new FileOutputStream(propFile); + props.storeToXML(os, "created on " + (new Date())); + } finally { + if (os != null) { + try { + os.close(); + } catch (Exception e) { + } + } + } + } + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/KernelEvaluationDao.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/KernelEvaluationDao.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/KernelEvaluationDao.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/KernelEvaluationDao.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,39 @@ +package org.apache.ctakes.ytex.kernel.dao; + +import java.util.List; + +import org.apache.ctakes.ytex.kernel.model.KernelEvaluation; +import org.apache.ctakes.ytex.kernel.model.KernelEvaluationInstance; + + +public interface KernelEvaluationDao { + + public abstract void storeNorm(KernelEvaluation kernelEvaluation, + long instanceId, double norm); + + public abstract Double getNorm(KernelEvaluation kernelEvaluation, + long instanceId); + + public abstract void storeKernel(KernelEvaluation kernelEvaluation, + long instanceId1, long instanceId2, double kernel); + + public abstract Double getKernel(KernelEvaluation kernelEvaluation, + long instanceId1, long instanceId2); + + public List getAllKernelEvaluationsForInstance( + KernelEvaluation kernelEvaluation, long instanceId); + + /** + * store the kernel evaluation if it doesn't exist, else return the existing + * one + * + * @param kernelEvaluation + * @return + */ + public abstract KernelEvaluation storeKernelEval( + KernelEvaluation kernelEvaluation); + + public abstract KernelEvaluation getKernelEval(String name, String experiment, + String label, int foldId, double param1, String param2); + +} \ No newline at end of file Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/KernelEvaluationDaoImpl.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/KernelEvaluationDaoImpl.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/KernelEvaluationDaoImpl.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/KernelEvaluationDaoImpl.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,188 @@ +package org.apache.ctakes.ytex.kernel.dao; + +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.ctakes.ytex.dao.DBUtil; +import org.apache.ctakes.ytex.kernel.model.KernelEvaluation; +import org.apache.ctakes.ytex.kernel.model.KernelEvaluationInstance; +import org.hibernate.Query; +import org.hibernate.SessionFactory; +import org.springframework.transaction.PlatformTransactionManager; +import org.springframework.transaction.TransactionStatus; +import org.springframework.transaction.support.TransactionCallback; +import org.springframework.transaction.support.TransactionTemplate; + + +public class KernelEvaluationDaoImpl implements KernelEvaluationDao { + private SessionFactory sessionFactory; + private static final Log log = LogFactory + .getLog(KernelEvaluationDaoImpl.class); + private PlatformTransactionManager transactionManager; + + public PlatformTransactionManager getTransactionManager() { + return transactionManager; + } + + public void setTransactionManager( + PlatformTransactionManager transactionManager) { + this.transactionManager = transactionManager; + txTemplate = new TransactionTemplate(this.transactionManager); + txTemplate + .setPropagationBehavior(TransactionTemplate.PROPAGATION_REQUIRES_NEW); + } + + private TransactionTemplate txTemplate; + + public SessionFactory getSessionFactory() { + return sessionFactory; + } + + public void setSessionFactory(SessionFactory sessionFactory) { + this.sessionFactory = sessionFactory; + } + + /* + * (non-Javadoc) + * + * @see dao.KernelEvaluationDao#storeNorm(java.lang.String, int, double) + */ + public void storeNorm(KernelEvaluation kernelEvaluation, long instanceId, + double norm) { + storeKernel(kernelEvaluation, instanceId, instanceId, norm); + } + + /* + * (non-Javadoc) + * + * @see dao.KernelEvaluationDao#getNorm(java.lang.String, int) + */ + public Double getNorm(KernelEvaluation kernelEvaluation, long instanceId) { + return getKernel(kernelEvaluation, instanceId, instanceId); + } + + /* + * (non-Javadoc) + * + * @see dao.KernelEvaluationDao#storeKernel(java.lang.String, int, int, + * double) + */ + public void storeKernel(KernelEvaluation kernelEvaluation, + long instanceId1, long instanceId2, double kernel) { + long instanceId1s = instanceId1 <= instanceId2 ? instanceId1 + : instanceId2; + long instanceId2s = instanceId1 <= instanceId2 ? instanceId2 + : instanceId1; + // don't bother with the delete so we can batch insert the kernel eval + // delete existing norm + // if (getKernel(name, instanceId1, instanceId2) != null) { + // Query q = this.getSessionFactory().getCurrentSession() + // .getNamedQuery("deleteKernelEvaluation"); + // q.setInteger("kernelEvaluationId", + // kernelEvaluation.getKernelEvaluationId()); + // q.setInteger("instanceId1", instanceId1s); + // q.setInteger("instanceId2", instanceId2s); + // q.executeUpdate(); + // if (log.isWarnEnabled()) + // log.warn("replacing kernel, instanceId1: " + instanceId1s + // + ", instanceId2: " + instanceId2s + ", name: " + name); + // } + KernelEvaluationInstance g = new KernelEvaluationInstance( + kernelEvaluation.getKernelEvaluationId(), instanceId1s, + instanceId2s, kernel); + this.getSessionFactory().getCurrentSession().save(g); + } + + /* + * (non-Javadoc) + * + * @see dao.KernelEvaluationDao#getKernel(java.lang.String, int, int) + */ + public Double getKernel(KernelEvaluation kernelEvaluation, + long instanceId1, long instanceId2) { + long instanceId1s = instanceId1 <= instanceId2 ? instanceId1 + : instanceId2; + long instanceId2s = instanceId1 <= instanceId2 ? instanceId2 + : instanceId1; + Query q = this.getSessionFactory().getCurrentSession() + .getNamedQuery("getKernelEvaluation"); + q.setCacheable(true); + q.setInteger("kernelEvaluationId", + kernelEvaluation.getKernelEvaluationId()); + q.setLong("instanceId1", instanceId1s); + q.setLong("instanceId2", instanceId2s); + KernelEvaluationInstance g = (KernelEvaluationInstance) q + .uniqueResult(); + if (g != null) { + return g.getSimilarity(); + } else { + return null; + } + } + + @SuppressWarnings("unchecked") + @Override + public List getAllKernelEvaluationsForInstance( + KernelEvaluation kernelEvaluation, long instanceId) { + Query q = this.getSessionFactory().getCurrentSession() + .getNamedQuery("getAllKernelEvaluationsForInstance1"); + q.setInteger("kernelEvaluationId", + kernelEvaluation.getKernelEvaluationId()); + q.setLong("instanceId", instanceId); + List kevals = q.list(); + Query q2 = this.getSessionFactory().getCurrentSession() + .getNamedQuery("getAllKernelEvaluationsForInstance2"); + q2.setInteger("kernelEvaluationId", + kernelEvaluation.getKernelEvaluationId()); + q2.setLong("instanceId", instanceId); + kevals.addAll(q2.list()); + return kevals; + } + + @Override + public KernelEvaluation storeKernelEval( + final KernelEvaluation kernelEvaluation) { + KernelEvaluation kEval = getKernelEval( + kernelEvaluation.getCorpusName(), + kernelEvaluation.getExperiment(), kernelEvaluation.getLabel(), + kernelEvaluation.getFoldId(), kernelEvaluation.getParam1(), + kernelEvaluation.getParam2()); + if (kEval == null) { + txTemplate.execute(new TransactionCallback() { + + @Override + public Object doInTransaction(TransactionStatus txStatus) { + try { + getSessionFactory().getCurrentSession().save( + kernelEvaluation); + } catch (Exception e) { + log.warn("couldn't save kernel evaluation, maybe somebody else did. try to retrieve kernel eval"); + if (log.isDebugEnabled()) + log.debug("error saving kernel eval", e); + txStatus.setRollbackOnly(); + } + return null; + } + }); + kEval = getKernelEval(kernelEvaluation.getCorpusName(), + kernelEvaluation.getExperiment(), + kernelEvaluation.getLabel(), kernelEvaluation.getFoldId(), + kernelEvaluation.getParam1(), kernelEvaluation.getParam2()); + } + return kEval; + } + + public KernelEvaluation getKernelEval(String name, String experiment, + String label, int foldId, double param1, String param2) { + Query q = this.getSessionFactory().getCurrentSession() + .getNamedQuery("getKernelEval"); + q.setString("corpusName", name); + q.setString("experiment", DBUtil.nullToEmptyString(experiment)); + q.setString("label", DBUtil.nullToEmptyString(label)); + q.setInteger("foldId", foldId); + q.setDouble("param1", param1); + q.setString("param2", DBUtil.nullToEmptyString(param2)); + return (KernelEvaluation) q.uniqueResult(); + } +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/SortedSetUserType.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/SortedSetUserType.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/SortedSetUserType.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/SortedSetUserType.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,99 @@ +package org.apache.ctakes.ytex.kernel.dao; + +import java.io.Serializable; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Types; +import java.util.Arrays; +import java.util.Iterator; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.hibernate.HibernateException; + +public class SortedSetUserType { + private static final Log log = LogFactory.getLog(SortedSetUserType.class); + + public int[] sqlTypes() { + return new int[] { Types.CHAR }; + } + + public Class returnedClass() { + return SortedSet.class; + } + + public boolean equals(Object x, Object y) { + return (x == y) + || (x != null && y != null && java.util.Arrays.equals( + (int[]) x, (int[]) y)); + } + + private String sortedSetToString(SortedSet set) { + StringBuilder b = new StringBuilder(); + Iterator iter = set.iterator(); + while (iter.hasNext()) { + b.append(iter.next()); + if (iter.hasNext()) { + b.append("|"); + } + } + return b.toString(); + } + + private Set stringToSortedSet(String s) { + String[] elements = s.split("\\|"); + SortedSet set = new TreeSet(); + set.addAll(Arrays.asList(elements)); + return set; + } + + public Object nullSafeGet(ResultSet rs, String[] names, Object owner) + throws HibernateException, SQLException { + String s = rs.getString(names[0]); + return stringToSortedSet(s); + } + + @SuppressWarnings("unchecked") + public void nullSafeSet(PreparedStatement st, Object value, int index) + throws HibernateException, SQLException { + st.setString(index, sortedSetToString((SortedSet) value)); + } + + public Object deepCopy(Object value) { + if (value == null) + return null; + + byte[] bytes = (byte[]) value; + byte[] result = new byte[bytes.length]; + System.arraycopy(bytes, 0, result, 0, bytes.length); + + return result; + } + + public boolean isMutable() { + return true; + } + + public Object assemble(Serializable cached, Object owner) + throws HibernateException { + return cached; + } + + public Serializable disassemble(Object value) throws HibernateException { + return (Serializable) value; + } + + public Object replace(Object original, Object target, Object owner) + throws HibernateException { + return original; + } + + public int hashCode(Object x) throws HibernateException { + return x.hashCode(); + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/StringArrayUserType.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/StringArrayUserType.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/StringArrayUserType.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/StringArrayUserType.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,107 @@ +package org.apache.ctakes.ytex.kernel.dao; + +import java.io.Serializable; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Types; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.hibernate.HibernateException; +import org.hibernate.engine.spi.SessionImplementor; +import org.hibernate.usertype.UserType; + +public class StringArrayUserType implements UserType { + private static final Log log = LogFactory.getLog(StringArrayUserType.class); + + public int[] sqlTypes() { + return new int[] { Types.VARCHAR }; + } + + public Class returnedClass() { + return String[].class; + } + + public boolean equals(Object x, Object y) { + return (x == y) + || (x != null && y != null && java.util.Arrays.equals( + (int[]) x, (int[]) y)); + } + + private String stringArrayToString(String[] set) { + StringBuilder b = new StringBuilder(); + SortedSet s = new TreeSet(); + s.addAll(Arrays.asList(set)); + Iterator iter = s.iterator(); + while (iter.hasNext()) { + b.append(iter.next()); + if (iter.hasNext()) { + b.append("|"); + } + } + return b.toString(); + } + + private String[] stringToSortedSet(String s) { + String[] elements = s.split("\\|"); + SortedSet set = new TreeSet(); + List l = new ArrayList(); + l.addAll(Arrays.asList(elements)); + return l.toArray(new String[] {}); + } + + public Object deepCopy(Object value) { + if (value == null) + return null; + + String source[] = (String[]) value; + String copy[] = new String[source.length]; + for (int i = 0; i < source.length; i++) + copy[i] = source[i]; + return copy; + } + + public boolean isMutable() { + return true; + } + + public Object assemble(Serializable cached, Object owner) + throws HibernateException { + return cached; + } + + public Serializable disassemble(Object value) throws HibernateException { + return (Serializable) value; + } + + public Object replace(Object original, Object target, Object owner) + throws HibernateException { + return original; + } + + public int hashCode(Object x) throws HibernateException { + return x.hashCode(); + } + + @Override + public Object nullSafeGet(ResultSet rs, String[] names, + SessionImplementor si, Object owner) throws HibernateException, + SQLException { + String s = rs.getString(names[0]); + return stringToSortedSet(s); + } + + @Override + public void nullSafeSet(PreparedStatement st, Object value, int index, + SessionImplementor arg3) throws HibernateException, SQLException { + st.setString(index, stringArrayToString((String[]) value)); + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/AttributeProductKernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/AttributeProductKernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/AttributeProductKernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/AttributeProductKernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,23 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + + +/** + * Expects numeric values as input. Returns the product of the specified values, + * + * @author vijay + * + */ +public class AttributeProductKernel implements Kernel { + + @Override + public double evaluate(Object o1, Object o2) { + double d = 0; + Number num1 = (Number) o1; + Number num2 = (Number) o2; + if (num1 != null && num2 != null) { + d = num1.doubleValue() * num2.doubleValue(); + } + return d; + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CacheKernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CacheKernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CacheKernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CacheKernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,84 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import net.sf.ehcache.Cache; +import net.sf.ehcache.CacheManager; +import net.sf.ehcache.Element; + +import org.springframework.beans.factory.InitializingBean; + +/** + * consolidate caching in this class. using AOP is very slow!!! According to the + * profiler, Class.isInterface() (called by AOP) eats up a large chunk of CPU. + * Caching is enabled if the cacheName is specified. + *

+ * By default, we assume that the objects upon which we evaluate the kernel + * support the Comparable interface. If not, set the cacheKeyGenerator to a + * different class (default is SymmetricPairCacheKeyGenerator). + * + * @author vijay + * + */ +public abstract class CacheKernel implements Kernel, InitializingBean { + + private CacheManager cacheManager; + private String cacheName; + private Cache cache; + private CacheKeyGenerator cacheKeyGenerator = new SymmetricPairCacheKeyGenerator(); + + public CacheKeyGenerator getCacheKeyGenerator() { + return cacheKeyGenerator; + } + + public void setCacheKeyGenerator(CacheKeyGenerator cacheKeyGenerator) { + this.cacheKeyGenerator = cacheKeyGenerator; + } + + /** + * @return the cacheManager + */ + public CacheManager getCacheManager() { + return cacheManager; + } + + public String getCacheName() { + return cacheName; + } + + public abstract double innerEvaluate(Object o1, Object o2); + + public double evaluate(Object o1, Object o2) { + double dEval; + if (cache == null) { + dEval = innerEvaluate(o1, o2); + } else { + Object cacheKey = cacheKeyGenerator.getCacheKey(o1, o2); + Element e = this.cache.get(cacheKey); + if (e != null) { + dEval = (Double) e.getValue(); + } else { + dEval = innerEvaluate(o1, o2); + cache.put(new Element(cacheKey, dEval)); + } + } + return dEval; + } + + /** + * @param cacheManager + * the cacheManager to set + */ + public void setCacheManager(CacheManager cacheManager) { + this.cacheManager = cacheManager; + } + + public void setCacheName(String cacheName) { + this.cacheName = cacheName; + } + + @Override + public void afterPropertiesSet() throws Exception { + if (cacheName != null) { + cache = cacheManager.getCache(cacheName); + } + } +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CacheKeyGenerator.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CacheKeyGenerator.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CacheKeyGenerator.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CacheKeyGenerator.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,8 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import java.lang.reflect.Method; + +public interface CacheKeyGenerator { + public Object getCacheKey(Method method, Object[] args); + public Object getCacheKey(Object o1, Object o2); +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/ConvolutionKernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/ConvolutionKernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/ConvolutionKernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/ConvolutionKernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,68 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import org.apache.ctakes.ytex.kernel.tree.Node; + +/** + * Apply the delegate kernel to the children of the given nodes. + * + * @author vijay + * + */ +public class ConvolutionKernel implements Kernel { + private Kernel delegateKernel; + private String nodeType; + + public String getNodeType() { + return nodeType; + } + + public void setNodeType(String nodeType) { + this.nodeType = nodeType; + } + + public Kernel getDelegateKernel() { + return delegateKernel; + } + + public void setDelegateKernel(Kernel delegateKernel) { + this.delegateKernel = delegateKernel; + } + + private double pow = 1; + + public double getPow() { + return pow; + } + + public void setPow(double pow) { + this.pow = pow; + } + + /** + * c1 and c2 must be Nodes. if the nodeType field is set, the type of the + * Nodes must match nodeType field for us to evaluate the delegateKernel on + * them. + * + * @return sum( sum( K(child(i),child(j) ) ) ) + */ + public double evaluate(Object c1, Object c2) { + Node n1 = (Node) c1; + Node n2 = (Node) c2; + double d = 0; + for (Node child1 : n1.getChildren()) { + for (Node child2 : n2.getChildren()) { + // if node type specified, they have to match + if (getNodeType() == null + || (getNodeType().equals(child1.getType()) && getNodeType() + .equals(child2.getType()))) { + d += delegateKernel.evaluate(child1, child2); + } + } + } + if (pow > 1) + return Math.pow(d, pow); + else + return d; + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CorpusKernelEvaluator.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CorpusKernelEvaluator.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CorpusKernelEvaluator.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CorpusKernelEvaluator.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,17 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import java.util.Map; + +import org.apache.ctakes.ytex.kernel.tree.Node; + + +public interface CorpusKernelEvaluator { + public void evaluateKernelOnCorpus(); + + public void evaluateKernelOnCorpus(Map instanceMap, + int nMod, int nSlice, boolean evalTest); + + public abstract void evaluateKernelOnCorpus( + Map instanceIDMap, int nMod, boolean evalTest) + throws InterruptedException; +} \ No newline at end of file Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CorpusKernelEvaluatorImpl.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CorpusKernelEvaluatorImpl.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CorpusKernelEvaluatorImpl.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/CorpusKernelEvaluatorImpl.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,498 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import javax.sql.DataSource; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.ctakes.ytex.dao.DBUtil; +import org.apache.ctakes.ytex.kernel.dao.KernelEvaluationDao; +import org.apache.ctakes.ytex.kernel.model.KernelEvaluation; +import org.apache.ctakes.ytex.kernel.model.KernelEvaluationInstance; +import org.apache.ctakes.ytex.kernel.tree.InstanceTreeBuilder; +import org.apache.ctakes.ytex.kernel.tree.Node; +import org.apache.ctakes.ytex.kernel.tree.TreeMappingInfo; +import org.springframework.context.ApplicationContext; +import org.springframework.context.access.ContextSingletonBeanFactoryLocator; +import org.springframework.context.support.FileSystemXmlApplicationContext; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.jdbc.core.RowCallbackHandler; +import org.springframework.jdbc.core.RowMapper; +import org.springframework.jdbc.core.simple.SimpleJdbcTemplate; +import org.springframework.transaction.PlatformTransactionManager; +import org.springframework.transaction.TransactionStatus; +import org.springframework.transaction.support.TransactionCallback; +import org.springframework.transaction.support.TransactionTemplate; + + +public class CorpusKernelEvaluatorImpl implements CorpusKernelEvaluator { + protected class InstanceIDRowMapper implements RowMapper { + + @Override + public Integer mapRow(ResultSet rs, int arg1) throws SQLException { + return rs.getInt(1); + } + + } + + public class SliceEvaluator implements Callable { + Map instanceIDMap; + int nMod; + int nSlice; + boolean evalTest; + + public SliceEvaluator(Map instanceIDMap, int nMod, + int nSlice, boolean evalTest) { + this.nSlice = nSlice; + this.nMod = nMod; + this.instanceIDMap = instanceIDMap; + this.evalTest = evalTest; + } + + @Override + public Object call() throws Exception { + try { + evaluateKernelOnCorpus(instanceIDMap, nMod, nSlice, evalTest); + } catch (Exception e) { + log.error("error on slice: " + nSlice, e); + throw e; + } + return null; + } + } + + private static final Log log = LogFactory + .getLog(CorpusKernelEvaluator.class); + + @SuppressWarnings("static-access") + private static Options initOptions() { + Options options = new Options(); + options.addOption(OptionBuilder + .withArgName("classpath*:simSvcBeanRefContext.xml") + .hasArg() + .withDescription( + "use specified beanRefContext.xml, default classpath*:simSvcBeanRefContext.xml") + .create("beanref")); + options.addOption(OptionBuilder + .withArgName("kernelApplicationContext") + .hasArg() + .withDescription( + "use specified applicationContext, default kernelApplicationContext") + .create("appctx")); + options.addOption(OptionBuilder + .withArgName("beans-corpus.xml") + .hasArg() + .withDescription( + "use specified beans.xml, no default. This file is typically required.") + .create("beans")); + options.addOption(OptionBuilder + .withArgName("yes/no") + .hasArg() + .withDescription( + "should test instances be evaluated? default no.") + .create("evalTest")); + options.addOption(OptionBuilder + .withArgName("instanceMap.obj") + .hasArg() + .withDescription( + "load instanceMap from file system instead of from db. Use after storing instance map. If not specified will attempt to load from db.") + .create("loadInstanceMap")); + options.addOption(OptionBuilder + .withDescription( + "for parallelization, split the instances into mod slices") + .hasArg().create("mod")); + options.addOption(OptionBuilder + .withDescription( + "for parallelization, parameter that determines which slice we work on. If this is not specified, nMod threads will be started to evaluate all slices in parallel.") + .hasArg().create("slice")); + options.addOption(new Option("help", "print this message")); + return options; + } + + public static void main(String args[]) throws Exception { + Options options = initOptions(); + + if (args.length == 0) { + printHelp(options); + } else { + CommandLineParser parser = new GnuParser(); + try { + // parse the command line arguments + CommandLine line = parser.parse(options, args); + // parse the command line arguments + String beanRefContext = line.getOptionValue("beanref", + "classpath*:simSvcBeanRefContext.xml"); + String contextName = line.getOptionValue("appctx", + "kernelApplicationContext"); + String beans = line.getOptionValue("beans"); + ApplicationContext appCtx = (ApplicationContext) ContextSingletonBeanFactoryLocator + .getInstance(beanRefContext) + .useBeanFactory(contextName).getFactory(); + ApplicationContext appCtxSource = appCtx; + if (beans != null) { + appCtxSource = new FileSystemXmlApplicationContext( + new String[] { beans }, appCtx); + } + evalKernel(appCtxSource, line); + } catch (ParseException e) { + printHelp(options); + throw e; + } + } + } + + private static void evalKernel(ApplicationContext appCtxSource, + CommandLine line) throws Exception { + InstanceTreeBuilder builder = appCtxSource + .getBean(InstanceTreeBuilder.class); + CorpusKernelEvaluator corpusEvaluator = appCtxSource + .getBean(CorpusKernelEvaluator.class); + String loadInstanceMap = line.getOptionValue("loadInstanceMap"); + String strMod = line.getOptionValue("mod"); + String strSlice = line.getOptionValue("slice"); + boolean evalTest = "yes".equalsIgnoreCase(line.getOptionValue( + "evalTest", "no")) + || "true".equalsIgnoreCase(line + .getOptionValue("evalTest", "no")); + int nMod = strMod != null ? Integer.parseInt(strMod) : 0; + Integer nSlice = null; + if (nMod == 0) { + nSlice = 0; + } else if (strSlice != null) { + nSlice = Integer.parseInt(strSlice); + } + Map instanceMap = null; + if (loadInstanceMap != null) { + instanceMap = builder.loadInstanceTrees(loadInstanceMap); + } else { + instanceMap = builder.loadInstanceTrees(appCtxSource + .getBean(TreeMappingInfo.class)); + } + if (nSlice != null) { + corpusEvaluator.evaluateKernelOnCorpus(instanceMap, nMod, nSlice, + evalTest); + } else { + corpusEvaluator.evaluateKernelOnCorpus(instanceMap, nMod, evalTest); + } + } + + private static void printHelp(Options options) { + HelpFormatter formatter = new HelpFormatter(); + formatter + .printHelp( + "java org.apache.ctakes.ytex.kernel.evaluator.CorpusKernelEvaluatorImpl", + options); + } + + private DataSource dataSource; + + private String experiment; + + private int foldId = 0; + + private String instanceIDQuery; + + private Kernel instanceKernel; + + private InstanceTreeBuilder instanceTreeBuilder; + + private JdbcTemplate jdbcTemplate; + + private KernelEvaluationDao kernelEvaluationDao; + + private String label = DBUtil.getEmptyString(); + + private String name; + + private double param1 = 0; + + private String param2 = DBUtil.getEmptyString(); + private SimpleJdbcTemplate simpleJdbcTemplate; + private PlatformTransactionManager transactionManager; + private TreeMappingInfo treeMappingInfo; + private TransactionTemplate txTemplate; + + private void evalInstance(Map instanceIDMap, + KernelEvaluation kernelEvaluation, long instanceId1, + SortedSet rightDocumentIDs) { + if (log.isDebugEnabled()) { + log.debug("left: " + instanceId1 + ", right: " + rightDocumentIDs); + } + for (long instanceId2 : rightDocumentIDs) { + // if (instanceId1 != instanceId2) { + final long i1 = instanceId1; + final long i2 = instanceId2; + final Node root1 = instanceIDMap.get(i1); + final Node root2 = instanceIDMap.get(i2); + if (root1 != null && root2 != null) { + kernelEvaluationDao.storeKernel(kernelEvaluation, i1, i2, + instanceKernel.evaluate(root1, root2)); + } + } + } + + @Override + public void evaluateKernelOnCorpus() { + final Map instanceIDMap = instanceTreeBuilder + .loadInstanceTrees(treeMappingInfo); + this.evaluateKernelOnCorpus(instanceIDMap, 0, 0, false); + } + + @Override + public void evaluateKernelOnCorpus(Map instanceIDMap, int nMod, + boolean evalTest) throws InterruptedException { + ExecutorService svc = Executors.newFixedThreadPool(nMod); + List> taskList = new ArrayList>(nMod); + for (int nSlice = 1; nSlice <= nMod; nSlice++) { + taskList.add(new SliceEvaluator(instanceIDMap, nMod, nSlice, + evalTest)); + } + svc.invokeAll(taskList); + svc.shutdown(); + svc.awaitTermination(60 * 4, TimeUnit.MINUTES); + } + + public void evaluateKernelOnCorpus(final Map instanceIDMap, + int nMod, int nSlice, boolean evalTest) { + KernelEvaluation kernelEvaluationTmp = new KernelEvaluation(); + kernelEvaluationTmp.setExperiment(this.getExperiment()); + kernelEvaluationTmp.setFoldId(this.getFoldId()); + kernelEvaluationTmp.setLabel(this.getLabel()); + kernelEvaluationTmp.setCorpusName(this.getName()); + kernelEvaluationTmp.setParam1(getParam1()); + kernelEvaluationTmp.setParam2(getParam2()); + final KernelEvaluation kernelEvaluation = this.kernelEvaluationDao + .storeKernelEval(kernelEvaluationTmp); + final List documentIds = new ArrayList(); + final List testDocumentIds = new ArrayList(); + loadDocumentIds(documentIds, testDocumentIds, instanceIDQuery); + if (!evalTest) { + // throw away the test ids if we're not going to evaluate them + testDocumentIds.clear(); + } + int nStart = 0; + int nEnd = documentIds.size(); + int total = documentIds.size(); + if (nMod > 0) { + nMod = Math.min(total, nMod); + } + if (nMod > 0 && nSlice > nMod) { + log.info("more slices than documents, skipping slice: " + nSlice); + return; + } + if (nMod > 0) { + int sliceSize = total / nMod; + nStart = sliceSize * (nSlice - 1); + if (nSlice != nMod) + nEnd = nStart + sliceSize; + } + for (int i = nStart; i < nEnd; i++) { + // left hand side of kernel evaluation + final long instanceId1 = documentIds.get(i); + if (log.isInfoEnabled()) + log.info("evaluating kernel for instance_id1 = " + instanceId1); + // list of instance ids right hand side of kernel evaluation + final SortedSet rightDocumentIDs = new TreeSet( + testDocumentIds); + if (i < documentIds.size()) { + // rightDocumentIDs.addAll(documentIds.subList(i + 1, + // documentIds.size() - 1)); + rightDocumentIDs.addAll(documentIds.subList(i, + documentIds.size())); + } + // remove instances already evaluated + for (KernelEvaluationInstance kEval : this.kernelEvaluationDao + .getAllKernelEvaluationsForInstance(kernelEvaluation, + instanceId1)) { + rightDocumentIDs + .remove(instanceId1 == kEval.getInstanceId1() ? kEval + .getInstanceId2() : kEval.getInstanceId1()); + } + // kernel evaluations for this instance are done in a single tx + // hibernate can batch insert these + txTemplate.execute(new TransactionCallback() { + + @Override + public Object doInTransaction(TransactionStatus arg0) { + evalInstance(instanceIDMap, kernelEvaluation, instanceId1, + rightDocumentIDs); + return null; + } + }); + + } + } + + public DataSource getDataSource() { + return dataSource; + } + + public String getExperiment() { + return experiment; + } + + public int getFoldId() { + return foldId; + } + + public String getInstanceIDQuery() { + return instanceIDQuery; + } + + public Kernel getInstanceKernel() { + return instanceKernel; + } + + public InstanceTreeBuilder getInstanceTreeBuilder() { + return instanceTreeBuilder; + } + + public KernelEvaluationDao getKernelEvaluationDao() { + return kernelEvaluationDao; + } + + public String getLabel() { + return label; + } + + public String getName() { + return name; + } + + public double getParam1() { + return param1; + } + + public String getParam2() { + return param2; + } + + public PlatformTransactionManager getTransactionManager() { + return transactionManager; + } + + public TreeMappingInfo getTreeMappingInfo() { + return treeMappingInfo; + } + + /** + * load the document ids from the instanceIDQuery + * + * @param documentIds + * @param testDocumentIds + * @param instanceIDQuery + */ + private void loadDocumentIds(final List documentIds, + final List testDocumentIds, final String instanceIDQuery) { + txTemplate.execute(new TransactionCallback() { + @Override + public List doInTransaction(TransactionStatus arg0) { + jdbcTemplate.query(instanceIDQuery, new RowCallbackHandler() { + Boolean trainFlag = null; + + /** + *
    + *
  • 1st column - document id + *
  • 2nd column - optional - train/test flag (train = 1) + *
+ */ + @Override + public void processRow(ResultSet rs) throws SQLException { + if (trainFlag == null) { + // see how many columns there are + // if we have 2 columsn, then we assume that the 2nd + // column has the train/test flag + // else we assume everything is training data + trainFlag = rs.getMetaData().getColumnCount() == 2; + } + long id = rs.getLong(1); + int train = trainFlag.booleanValue() ? rs.getInt(2) : 1; + if (train != 0) { + documentIds.add(id); + } else { + testDocumentIds.add(id); + } + } + }); + return null; + } + }); + } + + public void setDataSource(DataSource dataSource) { + this.dataSource = dataSource; + this.jdbcTemplate = new JdbcTemplate(dataSource); + this.simpleJdbcTemplate = new SimpleJdbcTemplate(dataSource); + } + + public void setExperiment(String experiment) { + this.experiment = experiment; + } + + public void setFoldId(int foldId) { + this.foldId = foldId; + } + + public void setInstanceIDQuery(String instanceIDQuery) { + this.instanceIDQuery = instanceIDQuery; + } + + public void setInstanceKernel(Kernel instanceKernel) { + this.instanceKernel = instanceKernel; + } + + public void setInstanceTreeBuilder(InstanceTreeBuilder instanceTreeBuilder) { + this.instanceTreeBuilder = instanceTreeBuilder; + } + + public void setKernelEvaluationDao(KernelEvaluationDao kernelEvaluationDao) { + this.kernelEvaluationDao = kernelEvaluationDao; + } + + public void setLabel(String label) { + this.label = label; + } + + public void setName(String name) { + this.name = name; + } + + public void setParam1(double param1) { + this.param1 = param1; + } + + public void setParam2(String param2) { + this.param2 = param2; + } + + public void setTransactionManager( + PlatformTransactionManager transactionManager) { + this.transactionManager = transactionManager; + txTemplate = new TransactionTemplate(this.transactionManager); + txTemplate + .setPropagationBehavior(TransactionTemplate.PROPAGATION_REQUIRES_NEW); + } + + public void setTreeMappingInfo(TreeMappingInfo treeMappingInfo) { + this.treeMappingInfo = treeMappingInfo; + } +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/EqualityKernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/EqualityKernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/EqualityKernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/EqualityKernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,17 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +public class EqualityKernel implements Kernel { + + @Override + public double evaluate(Object o1, Object o2) { + if(o1 == null && o2 == null) + return 1; + else if(o1 == null && o2 != null) + return 0; + else if(o2 == null && o1 != null) + return 0; + else + return o1.equals(o2) ? 1 : 0; + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/Kernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/Kernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/Kernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/Kernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,5 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +public interface Kernel { + double evaluate(Object o1, Object o2); +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/LinKernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/LinKernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/LinKernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/LinKernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,14 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import org.springframework.beans.factory.InitializingBean; + +/* + * @deprecated + */ +public class LinKernel extends SemanticSimKernel implements InitializingBean { + public LinKernel() { + super(); + this.setMetricNames("LIN"); + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/MethodCachingInterceptor.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/MethodCachingInterceptor.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/MethodCachingInterceptor.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/MethodCachingInterceptor.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,99 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import java.io.Serializable; + +import net.sf.ehcache.Cache; +import net.sf.ehcache.CacheManager; +import net.sf.ehcache.Element; + +import org.aopalliance.intercept.MethodInterceptor; +import org.aopalliance.intercept.MethodInvocation; +import org.springframework.beans.factory.InitializingBean; + +/** + * simple caching interceptor. we require a cacheName and cacheKeyGenerator. we + * don't use AOP style configuration because we reuse the same classes (kernels) + * in very different contexts. sometimes we want to cache, sometimes we don't. + * therefore, use old-school ProxyFactoryBean with this interceptor. + * + * This turns out to be very slow - a lot of time is spent in AOP-type stuff. + * This is due to the very high throughput when evaluating kernels. + * + * @author vijay + * + */ +public class MethodCachingInterceptor implements MethodInterceptor, + InitializingBean { + + private CacheManager cacheManager; + private String cacheName; + private Cache cache; + private CacheKeyGenerator cacheKeyGenerator; + private String methodName; + + public String getMethodName() { + return methodName; + } + + public void setMethodName(String methodName) { + this.methodName = methodName; + } + + public CacheKeyGenerator getCacheKeyGenerator() { + return cacheKeyGenerator; + } + + public void setCacheKeyGenerator(CacheKeyGenerator cacheKeyGenerator) { + this.cacheKeyGenerator = cacheKeyGenerator; + } + + /** + * @return the cacheManager + */ + public CacheManager getCacheManager() { + return cacheManager; + } + + public String getCacheName() { + return cacheName; + } + + public Object invoke(final MethodInvocation methodInvocation) + throws Throwable { + Object methodReturn = null; + if (methodName == null + || methodName.equals(methodInvocation.getMethod().getName())) { + final Object cacheKey = this.cacheKeyGenerator.getCacheKey( + methodInvocation.getMethod(), + methodInvocation.getArguments()); + final Element cacheElement = cache.get(cacheKey); + if (cacheElement == null) { + methodReturn = methodInvocation.proceed(); + cache.put(new Element(cacheKey, (Serializable) methodReturn)); + } else { + methodReturn = cacheElement.getValue(); + } + } else { + methodReturn = methodInvocation.proceed(); + } + + return methodReturn; + } + + /** + * @param cacheManager + * the cacheManager to set + */ + public void setCacheManager(CacheManager cacheManager) { + this.cacheManager = cacheManager; + } + + public void setCacheName(String cacheName) { + this.cacheName = cacheName; + } + + @Override + public void afterPropertiesSet() throws Exception { + cache = cacheManager.getCache(cacheName); + } +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NamedEntityNegationKernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NamedEntityNegationKernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NamedEntityNegationKernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NamedEntityNegationKernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,34 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import org.apache.ctakes.ytex.kernel.tree.Node; + +/** + * Evaluate negation status and certainty of named entities. If negation status + * differs, multiply convolution on concepts by -1. If certainty differs, + * multiply by 0.5. This assumes that possible values for certainty are + * certain/uncertain. + */ +public class NamedEntityNegationKernel extends ConvolutionKernel { + private static final String CONF_ATTR = "confidence"; + private static final String CERT_ATTR = "certainty"; + + @Override + public double evaluate(Object c1, Object c2) { + Node ne1 = (Node) c1; + Node ne2 = (Node) c2; + Number confidence1 = (Number) ne1.getValue().get(CONF_ATTR); + Number confidence2 = (Number) ne2.getValue().get(CONF_ATTR); + Integer certainty1 = (Integer) ne1.getValue().get(CERT_ATTR); + Integer certainty2 = (Integer) ne2.getValue().get(CERT_ATTR); + double negationFactor = 1; + if (confidence1 != null && confidence2 != null + && !confidence1.equals(confidence2)) + negationFactor = -1; + double certaintyFactor = 1; + if (certainty1 != null && certainty1 != null + && !certainty1.equals(certainty2)) + certaintyFactor = 0.5; + return negationFactor * certaintyFactor * super.evaluate(c1, c2); + } + +} Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NodeAttributeKernel.java URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NodeAttributeKernel.java?rev=1551254&view=auto ============================================================================== --- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NodeAttributeKernel.java (added) +++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/evaluator/NodeAttributeKernel.java Mon Dec 16 16:30:30 2013 @@ -0,0 +1,45 @@ +package org.apache.ctakes.ytex.kernel.evaluator; + +import org.apache.ctakes.ytex.kernel.tree.Node; + +/** + * Extract a node attribute and run the delegate kernel on the attribute. + * + * @author vijay + * + */ +public class NodeAttributeKernel implements Kernel { + + private Kernel delegateKernel; + private String attributeName; + + public String getAttributeName() { + return attributeName; + } + + public void setAttributeName(String attributeName) { + this.attributeName = attributeName; + } + + public Kernel getDelegateKernel() { + return delegateKernel; + } + + public void setDelegateKernel(Kernel delegateKernel) { + this.delegateKernel = delegateKernel; + } + + @Override + public double evaluate(Object o1, Object o2) { + Node n1 = (Node) o1; + Node n2 = (Node) o2; + if (n1 != null && n2 != null && n1.getType().equals(n2.getType())) { + Object attr1 = n1.getValue().get(attributeName); + Object attr2 = n2.getValue().get(attributeName); + if (attr1 != null && attr1 != null) { + return delegateKernel.evaluate(attr1, attr2); + } + } + return 0; + } +}