ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vjapa...@apache.org
Subject svn commit: r1551254 [17/26] - in /ctakes/branches/ytex: ctakes-ytex-res/ ctakes-ytex-res/.settings/ ctakes-ytex-res/src/ ctakes-ytex-res/src/main/ ctakes-ytex-res/src/main/resources/ ctakes-ytex-res/src/main/resources/org/ ctakes-ytex-res/src/main/res...
Date Mon, 16 Dec 2013 16:30:40 GMT
Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/AbstractBagOfWordsExporter.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/AbstractBagOfWordsExporter.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/AbstractBagOfWordsExporter.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/AbstractBagOfWordsExporter.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,229 @@
+package org.apache.ctakes.ytex.kernel;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.InvalidPropertiesFormatException;
+import java.util.Properties;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import javax.sql.DataSource;
+
+import org.springframework.jdbc.core.JdbcTemplate;
+import org.springframework.jdbc.core.PreparedStatementCreator;
+import org.springframework.jdbc.core.RowCallbackHandler;
+import org.springframework.jdbc.core.simple.SimpleJdbcTemplate;
+import org.springframework.transaction.PlatformTransactionManager;
+import org.springframework.transaction.TransactionStatus;
+import org.springframework.transaction.support.TransactionCallback;
+import org.springframework.transaction.support.TransactionTemplate;
+
+public class AbstractBagOfWordsExporter {
+
+	protected SimpleJdbcTemplate simpleJdbcTemplate;
+	protected JdbcTemplate jdbcTemplate;
+	protected PlatformTransactionManager transactionManager;
+	protected TransactionTemplate txNew;
+
+	public PlatformTransactionManager getTransactionManager() {
+		return transactionManager;
+	}
+
+	public void setTransactionManager(
+			PlatformTransactionManager transactionManager) {
+		this.transactionManager = transactionManager;
+		txNew = new TransactionTemplate(transactionManager);
+		txNew.setPropagationBehavior(TransactionTemplate.PROPAGATION_REQUIRES_NEW);
+	}
+
+	public AbstractBagOfWordsExporter() {
+		super();
+	}
+
+	public void setDataSource(DataSource ds) {
+		this.jdbcTemplate = new JdbcTemplate(ds);
+		this.simpleJdbcTemplate = new SimpleJdbcTemplate(ds);
+	}
+
+	public DataSource getDataSource(DataSource ds) {
+		return this.jdbcTemplate.getDataSource();
+	}
+
+	/**
+	 * 
+	 * @param sql
+	 *            result 1st column: instance id, 2nd column: word, 3rd column:
+	 *            numeric word value
+	 * @param instanceNumericWords
+	 *            map of instance id - [map word - word value] to be populated
+	 */
+	protected void getNumericInstanceWords(final String sql,
+			final BagOfWordsData bagOfWordsData) {
+		txNew.execute(new TransactionCallback<Object>() {
+
+			@Override
+			public Object doInTransaction(TransactionStatus txStatus) {
+				jdbcTemplate.query(new PreparedStatementCreator() {
+
+					@Override
+					public PreparedStatement createPreparedStatement(
+							Connection conn) throws SQLException {
+						return conn.prepareStatement(sql,
+								ResultSet.TYPE_FORWARD_ONLY,
+								ResultSet.CONCUR_READ_ONLY);
+					}
+
+				}, new RowCallbackHandler() {
+
+					@Override
+					public void processRow(ResultSet rs) throws SQLException {
+						int instanceId = rs.getInt(1);
+						String word = rs.getString(2);
+						double wordValue = rs.getDouble(3);
+						addNumericWordToInstance(bagOfWordsData, instanceId,
+								word, wordValue);
+					}
+				});
+				return null;
+			}
+
+		});
+	}
+
+	protected void addNumericWordToInstance(BagOfWordsData bagOfWordsData,
+			int instanceId, String word, double wordValue) {
+		// add the numeric word to the map of words for this document
+		SortedMap<String, Double> words = bagOfWordsData
+				.getInstanceNumericWords().get(instanceId);
+		if (words == null) {
+			words = new TreeMap<String, Double>();
+			bagOfWordsData.getInstanceNumericWords().put(instanceId, words);
+		}
+		words.put(word, wordValue);
+		bagOfWordsData.getNumericWords().add(word);
+		// increment the length of the document by the wordValue
+		Integer docLength = bagOfWordsData.getDocLengthMap().get(instanceId);
+		if (docLength == null) {
+			docLength = 0;
+		}
+		bagOfWordsData.getDocLengthMap().put(instanceId,
+				(docLength + (int) wordValue));
+		// add to the number of docs that have the word
+		Integer docsWithWord = bagOfWordsData.getIdfMap().get(word);
+		if (docsWithWord == null) {
+			docsWithWord = 0;
+		}
+		bagOfWordsData.getIdfMap().put(word, docsWithWord + 1);
+	}
+
+	protected void addNominalWordToInstance(BagOfWordsData bagOfWordsData,
+			int instanceId, String word, String wordValue) {
+		SortedMap<String, String> instanceWords = bagOfWordsData
+				.getInstanceNominalWords().get(instanceId);
+		SortedSet<String> wordValueSet = bagOfWordsData
+				.getNominalWordValueMap().get(word);
+		if (instanceWords == null) {
+			instanceWords = new TreeMap<String, String>();
+			bagOfWordsData.getInstanceNominalWords().put(instanceId,
+					instanceWords);
+		}
+		if (wordValueSet == null) {
+			wordValueSet = new TreeSet<String>();
+			bagOfWordsData.getNominalWordValueMap().put(word, wordValueSet);
+		}
+		// add the word-value for the instance
+		instanceWords.put(word, wordValue);
+		// add the value to the set of valid values
+		wordValueSet.add(wordValue);
+	}
+
+	/**
+	 * 
+	 * @param sql
+	 *            result set has 3 columns. 1st column - integer - instance id.
+	 *            2nd column - word. 3rd column - word value.
+	 * @param instanceWordMap
+	 *            map of instance id to word-word value.
+	 * @param wordValueMap
+	 *            map of word to valid values for the word.
+	 * @return populate maps with results of query.
+	 */
+	protected void getNominalInstanceWords(final String sql,
+			final BagOfWordsData bagOfWordsData) {
+		txNew.execute(new TransactionCallback<Object>() {
+
+			@Override
+			public Object doInTransaction(TransactionStatus txStatus) {
+				jdbcTemplate.query(new PreparedStatementCreator() {
+
+					@Override
+					public PreparedStatement createPreparedStatement(
+							Connection conn) throws SQLException {
+						return conn.prepareStatement(sql,
+								ResultSet.TYPE_FORWARD_ONLY,
+								ResultSet.CONCUR_READ_ONLY);
+					}
+
+				}, new RowCallbackHandler() {
+
+					@Override
+					public void processRow(ResultSet rs) throws SQLException {
+						int instanceId = rs.getInt(1);
+						String word = rs.getString(2);
+						String wordValue = rs.getString(3);
+						addNominalWordToInstance(bagOfWordsData, instanceId,
+								word, wordValue);
+					}
+				});
+				return null;
+			}
+		});
+	}
+
+	protected void loadProperties(String propertyFile, Properties props)
+			throws FileNotFoundException, IOException,
+			InvalidPropertiesFormatException {
+		InputStream in = null;
+		try {
+			in = new FileInputStream(propertyFile);
+			if (propertyFile.endsWith(".xml"))
+				props.loadFromXML(in);
+			else
+				props.load(in);
+		} finally {
+			if (in != null) {
+				in.close();
+			}
+		}
+	}
+
+	protected void loadData(BagOfWordsData bagOfWordsData,
+			String instanceNumericWordQuery, String instanceNominalWordQuery,
+			BagOfWordsDecorator bDecorator) {
+		if (instanceNumericWordQuery.trim().length() > 0)
+			this.getNumericInstanceWords(instanceNumericWordQuery,
+					bagOfWordsData);
+		// added to support adding gram matrix index in GramMatrixExporter
+		// TODO fix this
+		// currently not using weka gram matrix
+//		if (bDecorator != null)
+//			bDecorator.decorateNumericInstanceWords(
+//					bagOfWordsData.getInstanceNumericWords(),
+//					bagOfWordsData.getNumericWords());
+//		if (instanceNominalWordQuery.trim().length() > 0)
+//			this.getNominalInstanceWords(instanceNominalWordQuery,
+//					bagOfWordsData);
+//		if (bDecorator != null)
+//			bDecorator.decorateNominalInstanceWords(
+//					bagOfWordsData.getInstanceNominalWords(),
+//					bagOfWordsData.getNominalWordValueMap());
+	}
+}
\ No newline at end of file

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BagOfWordsData.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BagOfWordsData.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BagOfWordsData.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BagOfWordsData.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,130 @@
+package org.apache.ctakes.ytex.kernel;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/**
+ * Data structure populated by AbstractBagOfWordsExporter that has all the
+ * instance attributes needed for exporting to various formats.
+ * 
+ * @author vijay
+ * 
+ */
+public class BagOfWordsData {
+	/**
+	 * should we perform tf-idf normalization?
+	 */
+	boolean tfIdf;
+	/**
+	 * Map of instance id to class label
+	 */
+	Map<Integer, String> documentClasses = new HashMap<Integer, String>();
+	/**
+	 * class labels
+	 */
+	SortedSet<String> classes = new TreeSet<String>();
+	/**
+	 * numeric attribute labels
+	 */
+	SortedSet<String> numericWords = new TreeSet<String>();
+	/**
+	 * map if instance id to map of attribute name - value pairs
+	 */
+	Map<Integer, SortedMap<String, Double>> instanceNumericWords = new HashMap<Integer, SortedMap<String, Double>>();
+	/**
+	 * instance nominal attribute values
+	 */
+	Map<Integer, SortedMap<String, String>> instanceNominalWords = new HashMap<Integer, SortedMap<String, String>>();
+	/**
+	 * nominal attribute names and values
+	 */
+	SortedMap<String, SortedSet<String>> nominalWordValueMap = new TreeMap<String, SortedSet<String>>();
+	/**
+	 * for tf-idf, length of each instance
+	 */
+	Map<Integer, Integer> docLengthMap = new HashMap<Integer, Integer>();
+	/**
+	 * for tf-idf, term-document count map
+	 */
+	Map<String, Integer> idfMap = new HashMap<String, Integer>();
+
+	public Map<Integer, String> getDocumentClasses() {
+		return documentClasses;
+	}
+
+	public void setDocumentClasses(Map<Integer, String> documentClasses) {
+		this.documentClasses = documentClasses;
+	}
+
+	public SortedSet<String> getClasses() {
+		return classes;
+	}
+
+	public void setClasses(SortedSet<String> classes) {
+		this.classes = classes;
+	}
+
+	public SortedSet<String> getNumericWords() {
+		return numericWords;
+	}
+
+	public void setNumericWords(SortedSet<String> numericWords) {
+		this.numericWords = numericWords;
+	}
+
+	public Map<Integer, SortedMap<String, Double>> getInstanceNumericWords() {
+		return instanceNumericWords;
+	}
+
+	public void setInstanceNumericWords(
+			Map<Integer, SortedMap<String, Double>> instanceNumericWords) {
+		this.instanceNumericWords = instanceNumericWords;
+	}
+
+	public Map<Integer, SortedMap<String, String>> getInstanceNominalWords() {
+		return instanceNominalWords;
+	}
+
+	public void setInstanceNominalWords(
+			Map<Integer, SortedMap<String, String>> instanceNominalWords) {
+		this.instanceNominalWords = instanceNominalWords;
+	}
+
+	public SortedMap<String, SortedSet<String>> getNominalWordValueMap() {
+		return nominalWordValueMap;
+	}
+
+	public void setNominalWordValueMap(
+			SortedMap<String, SortedSet<String>> nominalWordValueMap) {
+		this.nominalWordValueMap = nominalWordValueMap;
+	}
+
+	public Map<Integer, Integer> getDocLengthMap() {
+		return docLengthMap;
+	}
+
+	public void setDocLengthMap(Map<Integer, Integer> docLengthMap) {
+		this.docLengthMap = docLengthMap;
+	}
+
+	public Map<String, Integer> getIdfMap() {
+		return idfMap;
+	}
+
+	public void setIdfMap(Map<String, Integer> idfMap) {
+		this.idfMap = idfMap;
+	}
+
+	public boolean isTfIdf() {
+		return tfIdf;
+	}
+
+	public void setTfIdf(boolean tfIdf) {
+		this.tfIdf = tfIdf;
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BagOfWordsDecorator.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BagOfWordsDecorator.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BagOfWordsDecorator.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BagOfWordsDecorator.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,22 @@
+package org.apache.ctakes.ytex.kernel;
+
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.SortedSet;
+
+/**
+ * Classes that delegate to the BagOfWordsExporter can pass this decorator in to
+ * add additional attributes
+ * 
+ * @author vijay
+ * 
+ */
+public interface BagOfWordsDecorator {
+	public void decorateNumericInstanceWords(
+			Map<Long, SortedMap<String, Double>> instanceNumericWords,
+			SortedSet<String> numericWords);
+
+	public void decorateNominalInstanceWords(
+			Map<Long, SortedMap<String, String>> instanceNominalWords,
+			Map<String, SortedSet<String>> nominalWordValueMap);
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BagOfWordsExporter.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BagOfWordsExporter.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BagOfWordsExporter.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BagOfWordsExporter.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,25 @@
+package org.apache.ctakes.ytex.kernel;
+
+import java.io.IOException;
+
+public interface BagOfWordsExporter {
+
+	/**
+	 * 
+	 * @param propertyFile
+	 *            .xml/.properties file with following properties:
+	 *            <ul>
+	 *            <li>
+	 *            arffRelation (see exportBagOfWords)
+	 *            <li>instanceClassQuery (see exportBagOfWords)
+	 *            <li>
+	 *            numericWordQuery (see exportBagOfWords)
+	 *            <li>nominalWordQuery (see exportBagOfWords)
+	 *            <li>arffFile file name to write arff file to
+	 *            </ul>
+	 * @throws IOException
+	 */
+	public abstract void exportBagOfWords(String propertyFile)
+			throws IOException;
+
+}
\ No newline at end of file

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BaseClassifierEvaluationParser.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BaseClassifierEvaluationParser.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BaseClassifierEvaluationParser.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BaseClassifierEvaluationParser.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,376 @@
+package org.apache.ctakes.ytex.kernel;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.ctakes.ytex.kernel.dao.ClassifierEvaluationDao;
+import org.apache.ctakes.ytex.kernel.model.ClassifierEvaluation;
+import org.apache.ctakes.ytex.kernel.model.ClassifierInstanceEvaluation;
+
+import com.google.common.collect.BiMap;
+import com.google.common.collect.HashBiMap;
+
+
+/**
+ * miscellaneous methods used for parsing various output types
+ * 
+ * @author vhacongarlav
+ * 
+ */
+public abstract class BaseClassifierEvaluationParser implements
+		ClassifierEvaluationParser {
+	private static final Log log = LogFactory
+			.getLog(BaseClassifierEvaluationParser.class);
+
+	public static Pattern wsPattern = Pattern.compile("\\s|\\z");
+	public static Pattern wsDotPattern = Pattern.compile("\\s|\\.|\\z");
+
+	private ClassifierEvaluationDao classifierEvaluationDao;
+
+	public static class InstanceClassInfo {
+		long instanceId;
+		boolean train;
+		String targetClassName;
+
+		public InstanceClassInfo() {
+			super();
+		}
+
+		public InstanceClassInfo(long instanceId, boolean train,
+				String targetClassName) {
+			super();
+			this.instanceId = instanceId;
+			this.train = train;
+			this.targetClassName = targetClassName;
+		}
+
+		public long getInstanceId() {
+			return instanceId;
+		}
+
+		public void setInstanceId(long instanceId) {
+			this.instanceId = instanceId;
+		}
+
+		public boolean isTrain() {
+			return train;
+		}
+
+		public void setTrain(boolean train) {
+			this.train = train;
+		}
+
+		public String getTargetClassName() {
+			return targetClassName;
+		}
+
+		public void setTargetClassName(String targetClassName) {
+			this.targetClassName = targetClassName;
+		}
+	}
+
+	public ClassifierEvaluationDao getClassifierEvaluationDao() {
+		return classifierEvaluationDao;
+	}
+
+	public void setClassifierEvaluationDao(
+			ClassifierEvaluationDao classifierEvaluationDao) {
+		this.classifierEvaluationDao = classifierEvaluationDao;
+	}
+
+	public static String extractFirstToken(String line, Pattern tokDelimPattern) {
+		Matcher wsMatcher = tokDelimPattern.matcher(line);
+		String token = null;
+		if (wsMatcher.find() && wsMatcher.start() > 0) {
+			token = line.substring(0, wsMatcher.start());
+		}
+		return token;
+	}
+
+	public List<Long> parseInstanceIds(String instanceIdFile)
+			throws IOException {
+		BufferedReader instanceIdReader = null;
+		List<Long> instanceIds = new ArrayList<Long>();
+		try {
+			instanceIdReader = new BufferedReader(
+					new FileReader(instanceIdFile));
+			String instanceId = null;
+			while ((instanceId = instanceIdReader.readLine()) != null)
+				instanceIds.add(Long.parseLong(instanceId));
+			return instanceIds;
+		} catch (FileNotFoundException e) {
+			log.warn(instanceIdFile
+					+ " not available, instance_ids will not be stored");
+			return null;
+		} finally {
+			if (instanceIdReader != null)
+				instanceIdReader.close();
+		}
+	}
+
+	/**
+	 * parse a number out of the libsvm command line that matches the specified
+	 * pattern.
+	 * 
+	 * @param pCost
+	 * @param options
+	 * @return null if option not present
+	 */
+	protected Double parseDoubleOption(Pattern pCost, String options) {
+		Matcher m = pCost.matcher(options);
+		if (m.find()) {
+			String toParse = m.group(1);
+			try {
+				return Double.parseDouble(toParse);
+			} catch (NumberFormatException nfe) {
+				log.warn("could not parse: " + toParse, nfe);
+			}
+		}
+		return null;
+	}
+
+	/**
+	 * 
+	 * parse a number out of the libsvm command line that matches the specified
+	 * pattern.
+	 * 
+	 * @param pKernel
+	 * @param options
+	 * @return null if option not present
+	 */
+	protected Integer parseIntOption(Pattern pKernel, String options) {
+		Matcher m = pKernel.matcher(options);
+		if (m.find())
+			return Integer.parseInt(m.group(1));
+		else
+			return null;
+	}
+
+	protected void initClassifierEvaluation(String instanceIdFile,
+			ClassifierEvaluation eval) {
+		eval.setFold(FileUtil.parseFoldFromFileName(instanceIdFile));
+		eval.setRun(FileUtil.parseRunFromFileName(instanceIdFile));
+		eval.setLabel(FileUtil.parseLabelFromFileName(instanceIdFile));
+	}
+
+	protected void initClassifierEvaluationFromProperties(Properties props,
+			ClassifierEvaluation eval) {
+		eval.setName(props.getProperty("kernel.name"));
+		eval.setExperiment(props.getProperty("kernel.experiment"));
+		String strParam1 = props.getProperty("kernel.param1");
+		if (strParam1 != null && strParam1.length() > 0)
+			eval.setParam1(Double.parseDouble(strParam1));
+		eval.setParam2(props.getProperty("kernel.param2"));
+		eval.setOptions(props.getProperty(ParseOption.EVAL_LINE.getOptionKey()));
+	}
+
+	/**
+	 * load properties from <tt>outputDir/options.properties</tt>. returns empty
+	 * properties if the file does not exist
+	 * 
+	 * @param outputDir
+	 * @return
+	 * @throws FileNotFoundException
+	 * @throws IOException
+	 */
+	public Properties loadProps(File outputDir) throws FileNotFoundException,
+			IOException {
+		return FileUtil.loadProperties(outputDir.getPath() + File.separator
+				+ "options.properties", true);
+	}
+
+	protected boolean checkFileRead(String file) {
+		return (new File(file)).canRead();
+	}
+
+	protected String getFileBaseName(Properties kernelProps) {
+		return kernelProps.getProperty(
+				ParseOption.DATA_BASENAME.getOptionKey(),
+				ParseOption.DATA_BASENAME.getDefaultValue());
+	}
+
+	protected void storeSemiSupervised(Properties kernelProps,
+			ClassifierEvaluation ce, BiMap<Integer, String> classIdToNameMap) {
+		boolean storeInstanceEval = YES.equalsIgnoreCase(kernelProps
+				.getProperty(ParseOption.STORE_INSTANCE_EVAL.getOptionKey(),
+						ParseOption.STORE_INSTANCE_EVAL.getDefaultValue()));
+		boolean storeUnlabeled = YES.equalsIgnoreCase(kernelProps.getProperty(
+				ParseOption.STORE_UNLABELED.getOptionKey(),
+				ParseOption.STORE_UNLABELED.getDefaultValue()));
+		boolean storeIR = YES.equalsIgnoreCase(kernelProps.getProperty(
+				ParseOption.STORE_IRSTATS.getOptionKey(),
+				ParseOption.STORE_IRSTATS.getDefaultValue()));
+		// save the classifier evaluation
+		this.getClassifierEvaluationDao().saveClassifierEvaluation(ce, classIdToNameMap,
+				storeInstanceEval || storeUnlabeled, storeIR, 0);
+	}
+
+	/**
+	 * used by semil & svmlin to store semisupervised predictions. these train
+	 * ml and make test predictions in a single step.
+	 * 
+	 * @param ce
+	 *            updated
+	 * @param listClassInfo
+	 *            the class info 0 - instance id, 1 - train/test, 2 - target
+	 *            class id
+	 * @param storeUnlabeled
+	 *            should the unlabeled predictions be stored?
+	 * @param classIds
+	 *            predicted class ids
+	 */
+	protected void updateSemiSupervisedPredictions(ClassifierEvaluation ce,
+			List<List<Long>> listClassInfo, boolean storeUnlabeled,
+			int[] classIds) {
+		for (int i = 0; i < classIds.length; i++) {
+			List<Long> classInfo = listClassInfo.get(i);
+			long instanceId = classInfo.get(0);
+			boolean train = classInfo.get(1) == 1;
+			int targetClassId = classInfo.get(2).intValue();
+			// if we are storing unlabeled instance ids, save this instance
+			// evaluation
+			// else only store it if this is a test instance id - save it
+			if (storeUnlabeled || !train) {
+				ClassifierInstanceEvaluation cie = new ClassifierInstanceEvaluation();
+				cie.setClassifierEvaluation(ce);
+				cie.setInstanceId(instanceId);
+				cie.setPredictedClassId(classIds[i]);
+				if (targetClassId != 0)
+					cie.setTargetClassId(targetClassId);
+				// add the instance eval to the parent
+				ce.getClassifierInstanceEvaluations().put(instanceId, cie);
+			}
+		}
+	}
+
+	protected void updateSemiSupervisedPredictions(ClassifierEvaluation ce,
+			List<InstanceClassInfo> listClassInfo, boolean storeUnlabeled,
+			String[] predictedClassNames, Map<String, Integer> classNameToIdMap) {
+		for (int i = 0; i < predictedClassNames.length; i++) {
+			InstanceClassInfo classInfo = listClassInfo.get(i);
+			boolean train = classInfo.isTrain();
+			// if we are storing unlabeled instance ids, save this instance
+			// evaluation
+			// else only store it if this is a test instance id - save it
+			if (storeUnlabeled || !train) {
+				ClassifierInstanceEvaluation cie = new ClassifierInstanceEvaluation();
+				cie.setClassifierEvaluation(ce);
+				cie.setInstanceId(classInfo.getInstanceId());
+				cie.setPredictedClassId(classNameToIdMap.get(predictedClassNames[i]));
+				int targetClassId = classNameToIdMap.get(classInfo.getTargetClassName());
+				if (targetClassId != 0)
+					cie.setTargetClassId(targetClassId);
+				// add the instance eval to the parent
+				ce.getClassifierInstanceEvaluations().put(cie.getInstanceId(), cie);
+			}
+		}
+	}
+	
+	protected BiMap<Integer, String> loadClassIdMap(File dataDir, String label)
+			throws IOException {
+		BiMap<Integer, String> classIndexMap = HashBiMap.create();
+		String filename = FileUtil.getScopedFileName(dataDir.getPath(), label,
+				null, null, "class.properties");
+		File f = new File(filename);
+		if (f.exists()) {
+			BufferedReader r = null;
+			try {
+				r = new BufferedReader(new FileReader(f));
+				Properties props = new Properties();
+				props.load(r);
+				for (String key : props.stringPropertyNames()) {
+					classIndexMap.put(Integer.parseInt(key),
+							props.getProperty(key));
+				}
+			} finally {
+				try {
+					r.close();
+				} catch (IOException e) {
+				}
+			}
+		}
+		return classIndexMap;
+	}
+
+	protected List<InstanceClassInfo> loadInstanceClassInfo(File dataDir,
+			String classFileName) throws IOException {
+		List<InstanceClassInfo> listClassInfo = null;
+		// load instance ids and their class ids
+		BufferedReader r = null;
+		try {
+			r = new BufferedReader(new FileReader(classFileName));
+			listClassInfo = new ArrayList<InstanceClassInfo>();
+			String line = null;
+			while ((line = r.readLine()) != null) {
+				if (line.trim().length() > 0) {
+					String classInfoToks[] = line.split("\\s");
+					if (classInfoToks.length != 3) {
+						log.error("error parsing line: " + line);
+						return null;
+					}
+					listClassInfo
+							.add(new InstanceClassInfo(Long
+									.parseLong(classInfoToks[0]), Integer
+									.parseInt(classInfoToks[1]) != 0,
+									classInfoToks[2]));
+				}
+			}
+		} catch (FileNotFoundException fe) {
+			log.warn("class.txt file not available: " + classFileName, fe);
+			listClassInfo = null;
+		} finally {
+			if (r != null) {
+				r.close();
+			}
+		}
+		return listClassInfo;
+	}
+
+	protected List<List<Long>> loadClassInfo(File dataDir, String classFileName)
+			throws IOException {
+		List<List<Long>> listClassInfo = null;
+		// load instance ids and their class ids
+		BufferedReader r = null;
+		try {
+			r = new BufferedReader(new FileReader(classFileName));
+			listClassInfo = new ArrayList<List<Long>>();
+			String line = null;
+			while ((line = r.readLine()) != null) {
+				if (line.trim().length() > 0) {
+					String classInfoToks[] = line.split("\\s");
+					List<Long> classInfo = new ArrayList<Long>(3);
+					for (String tok : classInfoToks) {
+						classInfo.add(Long.parseLong(tok));
+					}
+					if (classInfo.size() != 3) {
+						log.error("error parsing line: " + line);
+						return null;
+					}
+					listClassInfo.add(classInfo);
+				}
+			}
+		} catch (FileNotFoundException fe) {
+			log.warn("class.txt file not available: " + classFileName, fe);
+			listClassInfo = null;
+		} finally {
+			if (r != null) {
+				r.close();
+			}
+		}
+		return listClassInfo;
+	}
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BaseSparseDataFormatter.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BaseSparseDataFormatter.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BaseSparseDataFormatter.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/BaseSparseDataFormatter.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,261 @@
+package org.apache.ctakes.ytex.kernel;
+
+import java.io.BufferedWriter;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.ObjectOutputStream;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+
+import com.google.common.collect.BiMap;
+
+public abstract class BaseSparseDataFormatter implements SparseDataFormatter {
+	protected KernelUtil kernelUtil;
+
+	/**
+	 * directory to export files to, with trailing separator added on if
+	 * necessary
+	 */
+	protected String outdir = null;
+	/**
+	 * map of numeric attribute - attribute index.
+	 */
+	protected Map<String, Integer> numericAttributeMap = new HashMap<String, Integer>();
+	/**
+	 * map of nominal attribute - [nominal attribute value - attribute index].
+	 */
+	protected Map<String, Map<String, Integer>> nominalAttributeMap = new HashMap<String, Map<String, Integer>>();
+	/**
+	 * map of label - [class name - class index]
+	 */
+	protected Map<String, BiMap<String, Integer>> labelToClassIndexMap = new HashMap<String, BiMap<String, Integer>>();
+	/**
+	 * 1-based attribute index
+	 */
+	protected int maxAttributeIndex = 0;
+	/**
+	 * export properties - properties file that controls what to do for this
+	 * export
+	 */
+	protected Properties exportProperties;
+
+	public BaseSparseDataFormatter(KernelUtil kernelUtil) {
+		this.kernelUtil = kernelUtil;
+	}
+
+	protected void exportAttributeNames(SparseData sparseData, String label,
+			Integer run, Integer fold) throws IOException {
+		// reset attribute name/index state
+		this.nominalAttributeMap.clear();
+		this.numericAttributeMap.clear();
+		this.maxAttributeIndex = 0;
+		// construct file name
+		String filename = FileUtil.getScopedFileName(outdir, label, run, fold,
+				"attributes.txt");
+		BufferedWriter w = null;
+		try {
+			w = new BufferedWriter(new FileWriter(filename));
+			// write attributes
+			exportAttributeNames(w, sparseData);
+		} finally {
+			if (w != null)
+				w.close();
+		}
+	}
+
+	/**
+	 * assign indices to each attribute.
+	 * 
+	 * @param outdir
+	 *            directory to write file to
+	 * @param sparseData
+	 * @param numericAttributeMap
+	 * @param nominalAttributeMap
+	 *            for nominal indices, create an index for each value.
+	 * @throws IOException
+	 */
+	protected int exportAttributeNames(BufferedWriter w, SparseData sparseData)
+			throws IOException {
+		// add numeric indices
+		for (String attributeName : sparseData.getNumericWords()) {
+			addNumericAttribute(w, attributeName);
+		}
+		// add nominal indices
+		for (SortedMap.Entry<String, SortedSet<String>> nominalAttribute : sparseData
+				.getNominalWordValueMap().entrySet()) {
+			Map<String, Integer> attrValueIndexMap = new HashMap<String, Integer>(
+					nominalAttribute.getValue().size());
+			for (String attrValue : nominalAttribute.getValue()) {
+				w.write(nominalAttribute.getKey());
+				if (nominalAttribute.getValue().size() > 1) {
+					w.write("\t");
+					w.write(attrValue);
+				}
+				w.write("\n");
+				attrValueIndexMap.put(attrValue, ++maxAttributeIndex);
+			}
+			nominalAttributeMap.put(nominalAttribute.getKey(),
+					attrValueIndexMap);
+		}
+		return maxAttributeIndex;
+	}
+
+	protected void addNumericAttribute(BufferedWriter w, String attributeName)
+			throws IOException {
+		w.write(attributeName);
+		w.write("\n");
+		numericAttributeMap.put(attributeName, ++maxAttributeIndex);
+	}
+
+	/**
+	 * create a map of attribute index - attribute value for the given instance.
+	 * 
+	 * @param bagOfWordsData
+	 * @param numericAttributeMap
+	 * @param nominalAttributeMap
+	 * @param instanceId
+	 * @return
+	 */
+	protected SortedMap<Integer, Double> getSparseLineValues(
+			SparseData bagOfWordsData,
+			Map<String, Integer> numericAttributeMap,
+			Map<String, Map<String, Integer>> nominalAttributeMap,
+			long instanceId) {
+		SortedMap<Integer, Double> instanceValues = new TreeMap<Integer, Double>();
+		// get numeric values for instance
+		if (bagOfWordsData.getInstanceNumericWords().containsKey(instanceId)) {
+			for (Map.Entry<String, Double> numericValue : bagOfWordsData
+					.getInstanceNumericWords().get(instanceId).entrySet()) {
+				// look up index for attribute and put in map
+				instanceValues.put(
+						numericAttributeMap.get(numericValue.getKey()),
+						numericValue.getValue());
+			}
+		}
+		if (bagOfWordsData.getInstanceNominalWords().containsKey(instanceId)) {
+			for (Map.Entry<String, String> nominalValue : bagOfWordsData
+					.getInstanceNominalWords().get(instanceId).entrySet()) {
+				// look up index for attribute and value and put in map
+				instanceValues.put(
+						nominalAttributeMap.get(nominalValue.getKey()).get(
+								nominalValue.getValue()), 1d);
+			}
+		}
+		return instanceValues;
+	}
+
+	protected void exportSparseRow(SparseData bagOfWordsData, long instanceId,
+			BufferedWriter wData, int row) throws IOException {
+		SortedMap<Integer, Double> instanceValues = getSparseLineValues(
+				bagOfWordsData, numericAttributeMap, nominalAttributeMap,
+				instanceId);
+		// write attributes
+		// add the attributes
+		for (SortedMap.Entry<Integer, Double> instanceValue : instanceValues
+				.entrySet()) {
+			// row = instance number
+			wData.write(Integer.toString(row));
+			wData.write("\t");
+			// column = attribute index
+			wData.write(Integer.toString(instanceValue.getKey()));
+			wData.write("\t");
+			// value = value
+			// TODO fix me!
+			// instance id formatted as double
+			if (instanceValue.getKey() == 1) {
+				wData.write(Long.toString(instanceValue.getValue().longValue()));
+			} else {
+				wData.write(Double.toString(instanceValue.getValue()));
+			}
+			wData.write("\n");
+		}
+	}
+
+	/**
+	 * export sparse matrix data for use in matlab/R. creates _data.txt with
+	 * following columns:
+	 * <ul>
+	 * <li>row (int)
+	 * <li>column (int)
+	 * <li>cell value (double)
+	 * </ul>
+	 * also exports instance data (instance.txt). By default tab delimited
+	 * without header. This can be read as a normal 3-column matrix into
+	 * matlab/R, and then converted into a sparse matrix using
+	 * Matrix::sparseMatrix (R) or sparse (matlab).
+	 */
+	protected void exportSparseMatrix(String filename, SparseData sparseData)
+			throws IOException {
+		BufferedWriter wData = null;
+		try {
+			wData = new BufferedWriter(new FileWriter(filename));
+			int row = 1;
+			for (long instanceId : sparseData.getInstanceIds()) {
+				exportSparseRow(sparseData, instanceId, wData, row);
+				row++;
+			}
+		} finally {
+			if (wData != null)
+				wData.close();
+		}
+	}
+
+	// protected List<Integer> getInstanceIdsForScope(InstanceData
+	// instanceLabel,
+	// String label, Integer run, Integer fold) {
+	// List<Integer> instanceIds = new ArrayList<Integer>();
+	// SortedSet<Long> sortedInstanceIds = new TreeSet<Long>();
+	// if (label == null || label.length() == 0) {
+	// // add all instance ids
+	// for (SortedMap<Integer, SortedMap<Integer, SortedMap<Boolean,
+	// SortedMap<Long, String>>>> runMap : instanceLabel.labelToInstanceMap
+	// .values()) {
+	// for (SortedMap<Integer, SortedMap<Boolean, SortedMap<Long, String>>>
+	// foldMap : runMap
+	// .values()) {
+	// for (SortedMap<Boolean, SortedMap<Long, String>> trainTestFold : foldMap
+	// .values()) {
+	// for (SortedMap<Long, String> trainMap : trainTestFold
+	// .values())
+	// sortedInstanceIds.addAll(trainMap.keySet());
+	// }
+	// }
+	// }
+	// } else if (label != null && label.length() > 0 && run == null) {
+	// // label scope
+	// }
+	// return instanceIds;
+	// }
+
+	/**
+	 * get needed properties out of outdir. convert class names into integers
+	 * for libsvm. attempt to parse the class name into an integer. if this
+	 * fails, use an index that we increment. index corresponds to class name's
+	 * alphabetical order.
+	 */
+	@Override
+	public void initializeExport(InstanceData instanceLabel,
+			Properties properties, SparseData sparseData) throws IOException {
+		this.exportProperties = properties;
+		this.outdir = properties.getProperty("outdir");
+		FileUtil.createOutdir(outdir);
+		kernelUtil.fillLabelToClassToIndexMap(
+				instanceLabel.getLabelToClassMap(), this.labelToClassIndexMap);
+	}
+
+	/**
+	 * add the 'unlabeled' class id to the classIndexMap if it isn't there
+	 * already
+	 */
+	protected void updateLabelClassMapTransductive() {
+		for (Map<String, Integer> classIndexMap : labelToClassIndexMap.values()) {
+			classIndexMap.put("0", 0);
+		}
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvalUtil.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvalUtil.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvalUtil.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvalUtil.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,508 @@
+package org.apache.ctakes.ytex.kernel;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+public class ClassifierEvalUtil {
+	private static final Log log = LogFactory.getLog(ClassifierEvalUtil.class);
+	Properties props;
+
+	public ClassifierEvalUtil(String propFile) throws IOException {
+		if (propFile != null)
+			props = FileUtil.loadProperties(propFile, true);
+		else
+			props = System.getProperties();
+	}
+
+	/**
+	 * @param args
+	 * @throws IOException
+	 */
+	public static void main(String[] args) throws IOException {
+		String propFile = null;
+		if (args.length > 0)
+			propFile = args[0];
+		ClassifierEvalUtil ceUtil = new ClassifierEvalUtil(propFile);
+		ceUtil.generateEvalFiles();
+	}
+
+	private void generateEvalFiles() throws IOException {
+		String algo = props.getProperty("kernel.algo");
+		if ("semil".equalsIgnoreCase(algo)) {
+			generateSemilEvalParams();
+		} else if ("svmlight".equalsIgnoreCase(algo)
+				|| "libsvm".equalsIgnoreCase(algo)) {
+			generateSvmEvalParams(algo.toLowerCase());
+		} else if ("svmlin".equalsIgnoreCase(algo)) {
+			generateSvmLinParams(algo.toLowerCase());
+		}
+	}
+
+	private void generateSvmLinParams(String lowerCase) throws IOException {
+		File kernelDataDir = new File(props.getProperty("kernel.data", "."));
+		String weightPropsFile = props.getProperty(
+				"kernel.svmlin.classweights", kernelDataDir
+						+ "/classWeights.properties");
+		if (log.isDebugEnabled()) {
+			log.debug("loading weights from " + weightPropsFile);
+		}
+		Properties weightProps = FileUtil
+				.loadProperties(weightPropsFile, false);
+		if (weightProps == null) {
+			log.warn("could not load weights from file: " + weightPropsFile);
+		}
+		Properties props = new Properties();
+		File[] labelFiles = kernelDataDir.listFiles(new FilenameFilter() {
+			@Override
+			public boolean accept(File dir, String name) {
+				return name.endsWith("code.properties");
+			}
+		});
+		if (labelFiles != null && labelFiles.length > 0) {
+			// iterate over label files
+			for (File labelFile : labelFiles) {
+				populateSvmlinParameters(labelFile, kernelDataDir, weightProps,
+						props);
+				// writeSvmlinEvalFile(labelFile, kernelDataDir);
+			}
+		}
+		writeProps(kernelDataDir + "/parameters.properties", props);
+	}
+
+	private String getSvmlinDataFileForLabel(File labelFile, File kernelDataDir) {
+		String labelFileName = labelFile.getName();
+		String label = FileUtil.parseLabelFromFileName(labelFileName);
+		Integer run = FileUtil.parseRunFromFileName(labelFileName);
+		Integer fold = FileUtil.parseFoldFromFileName(labelFileName);
+		File dataFile = null;
+		// check fold scope
+		if (fold != null && fold.intValue() != 0) {
+			dataFile = new File(FileUtil.getScopedFileName(
+					kernelDataDir.getPath(), label, run, fold, "data.txt"));
+		}
+		// no matches, check label scope
+		if ((dataFile == null || !dataFile.exists()) && label != null
+				&& label.length() > 0) {
+			dataFile = new File(FileUtil.getScopedFileName(
+					kernelDataDir.getPath(), label, null, null, "data.txt"));
+		}
+		// no matches, check unscoped
+		if (dataFile == null || !dataFile.exists()) {
+			dataFile = new File(FileUtil.getScopedFileName(
+					kernelDataDir.getPath(), null, null, null, "data.txt"));
+		}
+		if (dataFile != null && dataFile.exists()) {
+			return dataFile.getName();
+		} else {
+			log.warn("no data files match label file: " + labelFile);
+			return null;
+		}
+	}
+
+	/**
+	 * set following properties in props
+	 * <ul>
+	 * <li>[codeFile basename].dataFile
+	 * <li>[codeFile basename].kernel.evalLines
+	 * <li>[labelFile basename].param.R
+	 * </ul>
+	 * 
+	 * @param codeFile
+	 * @param kernelDataDir
+	 * @param weightProps
+	 * @param props
+	 * @throws IOException
+	 */
+	private void populateSvmlinParameters(File codeFile, File kernelDataDir,
+			Properties weightProps, Properties paramProps) throws IOException {
+		// cut off the .properties from the file name - this is the prefix for
+		// the properties
+		String codeFileBasename = codeFile.getName();
+		codeFileBasename = codeFileBasename.substring(0,
+				codeFileBasename.length() - ".properties".length());
+		// determine the scoped data file name
+		String dataFile = getSvmlinDataFileForLabel(codeFile, kernelDataDir);
+		if (dataFile != null) {
+			// if the dataFile could be found, set the property
+			paramProps.setProperty(codeFileBasename + ".dataFile", dataFile);
+			// generate the parameter grid
+			List<String> algos = Arrays.asList(addOptionPrefix(props
+					.getProperty("cv.svmlin.algo").split(","), "-A "));
+			List<String> lambdaU = Arrays.asList(addOptionPrefix(props
+					.getProperty("cv.svmlin.lambdaW").split(","), "-W "));
+			List<String> lambdaW = Arrays.asList(addOptionPrefix(props
+					.getProperty("cv.svmlin.lambdaU").split(","), "-U "));
+			List<String> evalLines = parameterGrid(algos, lambdaU, lambdaW);
+			// set the parameter grid property
+			paramProps.setProperty(codeFileBasename + ".kernel.evalLines",
+					listToString(evalLines));
+			if (weightProps != null) {
+				// determine the positive class fraction for each label file
+				Properties codeProps = FileUtil.loadProperties(
+						codeFile.getAbsolutePath(), false);
+				// iterate through the code files
+				for (String labelfile : codeProps.getProperty("codes", "")
+						.split(",")) {
+					// get the class id for the given label file
+					String className = codeProps
+							.getProperty(labelfile + ".className");
+					// figure out the key to look up the positive class fraction
+					// in the classWeights.properties file
+					// if a label is specified then the key is label.n.class.m,
+					// else just class.m
+					String label = FileUtil.parseLabelFromFileName(labelfile);
+					String key = label != null && label.length() > 0 ? "label"
+							+ label + "_" : "";
+					key = key + "class" + className;
+					String posClassFrac = weightProps.getProperty(key);
+					if (posClassFrac != null) {
+						// set the class fraction property
+						// use basename of label file as key prefix
+						paramProps.put(labelfile + ".param.R", posClassFrac);
+					}
+				}
+			}
+		}
+	}
+	
+
+	// private void writeSvmlinEvalFile(File labelFile, File kernelDataDir)
+	// throws IOException {
+	// String dataFile = getSvmlinDataFileForLabel(labelFile, kernelDataDir);
+	// if (dataFile != null) {
+	// List<String> classFracs = new ArrayList<String>(1);
+	// String posClassFrac = getClassFrac(labelFile);
+	// if (posClassFrac != null) {
+	// classFracs.add("-R " + posClassFrac);
+	// } else {
+	// classFracs.add("");
+	// }
+	// List<String> algos = Arrays.asList(addOptionPrefix(props
+	// .getProperty("cv.svmlin.algo").split(","), "-A "));
+	// List<String> lambdaU = Arrays.asList(addOptionPrefix(props
+	// .getProperty("cv.svmlin.lambdaW").split(","), "-W "));
+	// List<String> lambdaW = Arrays.asList(addOptionPrefix(props
+	// .getProperty("cv.svmlin.lambdaU").split(","), "-U "));
+	// List<String> evalLines = parameterGrid(classFracs, algos, lambdaU,
+	// lambdaW);
+	// Properties props = new Properties();
+	// props.setProperty("kernel.dataFile", dataFile);
+	// props.setProperty("kernel.evalLines", listToString(evalLines));
+	// String evalFile = labelFile.getPath().substring(0,
+	// labelFile.getPath().length() - 3)
+	// + "properties";
+	// writeProps(evalFile, props);
+	// }
+	// }
+
+	// /**
+	// * get the positive class fraction. get this from the
+	// * kernel.classrel.[label] or kernel.classrel property
+	// *
+	// * @param labelFile
+	// * @return class fraction if specified
+	// */
+	// private String getClassFrac(File labelFile) {
+	// String classFrac = null;
+	// String label = FileUtil.parseLabelFromFileName(labelFile.getName());
+	// if (label != null) {
+	// classFrac = props.getProperty("kernel.classrel." + label);
+	// } else {
+	// classFrac = props.getProperty("kernel.classrel");
+	// }
+	// return classFrac;
+	// }
+
+	private void generateSvmEvalParams(String svmType) throws IOException {
+		File kernelDataDir = new File(props.getProperty("kernel.data", "."));
+		File[] trainFiles = kernelDataDir.listFiles(new FilenameFilter() {
+			@Override
+			public boolean accept(File dir, String name) {
+				return name.endsWith("train_data.txt");
+			}
+		});
+		Properties params = new Properties();
+		if (trainFiles != null && trainFiles.length > 0) {
+			// iterate over label files
+			for (File trainFile : trainFiles) {
+				writeSvmEvalFile(params, trainFile, kernelDataDir, svmType);
+			}
+		}
+		writeProps(kernelDataDir + "/parameters.properties", params);
+	}
+
+	/**
+	 * generate parameter grid for each training file. add a property [file base
+	 * name].kernel.evalLines=xxx to props.
+	 * 
+	 * @param props
+	 *            properties to populate
+	 * @param trainFile
+	 * @param kernelDataDir
+	 * @param svmType
+	 * @throws IOException
+	 */
+	private void writeSvmEvalFile(Properties params, File trainFile,
+			File kernelDataDir, String svmType) throws IOException {
+		// list to hold the svm command lines
+		List<String> evalLines = new ArrayList<String>();
+		// label-specific weight parameters from a property file
+		List<String> weightParams = getWeightParams(trainFile, svmType);
+		// kernels to test
+		List<String> kernels = Arrays.asList(props.getProperty("kernel.types")
+				.split(","));
+		// cost params
+		List<String> costs = Arrays.asList(addOptionPrefix(
+				props.getProperty("cv.costs").split(","), "-c "));
+		// other general params
+		List<String> libsvmEval = Arrays.asList(props.getProperty(
+				"cv." + svmType + ".train.line", "").split(","));
+		// iterate through kernel types, generate parameter grids
+		for (String kernel : kernels) {
+			List<String> kernelOpts = Arrays.asList(new String[] { "-t "
+					+ kernel });
+			if ("0".equals(kernel) || "4".equals(kernel)) {
+				// linear/custom kernel - just cost & weight param
+				evalLines.addAll(parameterGrid(libsvmEval, kernelOpts, costs,
+						weightParams));
+			} else if ("1".equals(kernel)) {
+				// polynomial kernel - cost & weight & degree param
+				evalLines.addAll(parameterGrid(libsvmEval, kernelOpts, costs,
+						weightParams, Arrays.asList(addOptionPrefix(props
+								.getProperty("cv.poly.degrees").split(","),
+								"-d "))));
+			} else if ("2".equals(kernel) || "3".equals(kernel)) {
+				// polynomial kernel - cost & weight & gamma param
+				evalLines.addAll(parameterGrid(libsvmEval, kernelOpts, costs,
+						weightParams, Arrays
+								.asList(addOptionPrefix(
+										props.getProperty("cv.rbf.gammas")
+												.split(","), "-g "))));
+			}
+		}
+		if (evalLines.size() > 0) {
+			String basename = trainFile.getName().substring(0,
+					trainFile.getName().length() - 4);
+			params.put(basename + ".kernel.evalLines", listToString(evalLines));
+			// String evalFile = trainFile.getPath().substring(0,
+			// trainFile.getPath().length() - 3)
+			// + "properties";
+			// Properties evalProps = new Properties();
+			// evalProps.put("kernel.evalLines", listToString(evalLines));
+			// writeProps(evalFile, evalProps);
+		}
+	}
+
+	private List<String> getWeightParams(File trainFile, String svmType)
+			throws IOException {
+		if ("libsvm".equals(svmType)) {
+			String label = FileUtil.parseLabelFromFileName(trainFile.getName());
+			// default label to 0
+			label = label != null && label.length() > 0 ? label : "0";
+			Properties weightProps = new Properties();
+			weightProps.putAll(props);
+			if (props.getProperty("kernel.classweights") != null) {
+				Properties tmp = FileUtil.loadProperties(
+						props.getProperty("kernel.classweights"), false);
+				if(tmp != null)
+					weightProps.putAll(tmp);
+			}
+			String weights = weightProps.getProperty("kernel.weight."
+					+ label);
+			if (weights != null && weights.length() > 0) {
+				return Arrays.asList(weights.split(","));
+			}
+		}
+		return new ArrayList<String>(0);
+	}
+
+	private void generateSemilEvalParams() throws IOException {
+		File kernelDataDir = new File(props.getProperty("kernel.data", "."));
+		List<String> evalLines = generateSemilEvalLines();
+		File[] labelFiles = kernelDataDir.listFiles(new FilenameFilter() {
+
+			@Override
+			public boolean accept(File dir, String name) {
+				return name.endsWith("label.txt");
+			}
+		});
+		if (labelFiles != null && labelFiles.length > 0) {
+			// iterate over label files
+			for (File labelFile : labelFiles) {
+				List<String> distFiles = getSemilDistFilesForLabel(labelFile,
+						kernelDataDir);
+				if (distFiles != null)
+					writeSemilEvalFile(distFiles, evalLines, labelFile);
+			}
+		}
+	}
+
+	/**
+	 * convert list of strings to comma-delimited string;
+	 * 
+	 * @param listStr
+	 * @return
+	 */
+	private String listToString(List<String> listStr) {
+		StringBuilder b = new StringBuilder();
+		boolean bfirst = true;
+		for (String str : listStr) {
+			if (!bfirst)
+				b.append(",");
+			b.append(str);
+			bfirst = false;
+		}
+		return b.toString();
+	}
+
+	/**
+	 * write file for label
+	 * 
+	 * @param distFiles
+	 * @param evalLines
+	 * @param labelFile
+	 * @throws IOException
+	 */
+	private void writeSemilEvalFile(List<String> distFiles,
+			List<String> evalLines, File labelFile) throws IOException {
+		String labelFileName = labelFile.getPath();
+		String evalFileName = labelFileName.substring(0,
+				labelFileName.length() - 3) + "properties";
+		Properties props = new Properties();
+		props.setProperty("kernel.distFiles", listToString(distFiles));
+		props.setProperty("kernel.evalLines", listToString(evalLines));
+		writeProps(evalFileName, props);
+	}
+
+	private void writeProps(String evalFileName, Properties evalProps)
+			throws IOException {
+		if ("no".equalsIgnoreCase(props.getProperty("kernel.overwriteEvalFile",
+				"yes"))) {
+			File evalFile = new File(evalFileName);
+			if (evalFile.exists()) {
+				log.warn("skipping because eval file exists: " + evalFileName);
+				return;
+			}
+		}
+		BufferedWriter w = null;
+		try {
+
+			w = new BufferedWriter(new FileWriter(evalFileName));
+			evalProps.store(w, null);
+		} finally {
+			if (w != null)
+				w.close();
+		}
+	}
+
+	/**
+	 * generate command lines for semil
+	 * 
+	 * @return
+	 */
+	private List<String> generateSemilEvalLines() {
+		// cv.rbf.gammas
+		String gammas = props.getProperty("cv.rbf.gammas");
+		List<String> gammaOpts = null;
+		if (gammas != null && gammas.length() > 0) {
+			gammaOpts = Arrays
+					.asList(addOptionPrefix(gammas.split(","), "-g "));
+		}
+		// cv.semil.methods
+		List<String> methods = Arrays.asList(props.getProperty(
+				"cv.semil.methods", "").split(","));
+		// semil.line
+		List<String> semil = Arrays.asList(props.getProperty("cv.semil.line",
+				"").split(","));
+		return parameterGrid(semil, gammaOpts, methods);
+	}
+
+	private String[] addOptionPrefix(String[] args, String prefix) {
+		String[] options = new String[args.length];
+		for (int i = 0; i < args.length; i++) {
+			options[i] = prefix + args[i];
+		}
+		return options;
+	}
+
+	/**
+	 * recursively generate parameter grid
+	 * 
+	 * @param lines
+	 *            current lines
+	 * @param params
+	 *            variable number of List<String> arguments
+	 * @return
+	 */
+	private List<String> parameterGrid(List<String> lines, Object... params) {
+		List<String> newLines = new ArrayList<String>();
+		@SuppressWarnings("unchecked")
+		List<String> paramList = (List<String>) params[0];
+		if (paramList != null && paramList.size() > 0) {
+			// only iterate over the list if it is non-empty
+			for (String line : lines) {
+				for (String param : paramList) {
+					newLines.add(line + " " + param);
+				}
+			}
+		} else {
+			// else newLines = lines
+			newLines.addAll(lines);
+		}
+		if (params.length > 1) {
+			return parameterGrid(newLines,
+					Arrays.copyOfRange(params, 1, params.length));
+		} else {
+			return newLines;
+		}
+	}
+
+	private List<String> getSemilDistFilesForLabel(File labelFile,
+			File kernelDataDir) {
+		String labelFileName = labelFile.getName();
+		String label = FileUtil.parseLabelFromFileName(labelFileName);
+		Integer run = FileUtil.parseRunFromFileName(labelFileName);
+		Integer fold = FileUtil.parseFoldFromFileName(labelFileName);
+		File[] distFiles = null;
+		// check fold scope
+		if (fold != null) {
+			String filePrefix = FileUtil.getFoldFilePrefix(null, label, run,
+					fold) + "_dist_";
+			distFiles = kernelDataDir.listFiles(new FileUtil.PrefixFileFilter(
+					filePrefix));
+		}
+		// no matches, check label scope
+		if ((distFiles == null || distFiles.length == 0) && label != null) {
+			String filePrefix = FileUtil.getFoldFilePrefix(null, label, null,
+					null) + "_dist_";
+			distFiles = kernelDataDir.listFiles(new FileUtil.PrefixFileFilter(
+					filePrefix));
+		}
+		// no matches, check unscoped
+		if (distFiles == null || distFiles.length == 0) {
+			distFiles = kernelDataDir.listFiles(new FileUtil.PrefixFileFilter(
+					"dist_"));
+		}
+		if (distFiles != null && distFiles.length > 0) {
+			List<String> listDistFiles = new ArrayList<String>(distFiles.length);
+			for (File distFile : distFiles) {
+				listDistFiles.add(distFile.getName());
+			}
+			return listDistFiles;
+		} else {
+			log.warn("no dist files match label file: " + labelFile);
+			return null;
+		}
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationImporter.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationImporter.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationImporter.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationImporter.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,191 @@
+package org.apache.ctakes.ytex.kernel;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * parse classifier evaluation results. expect input data files to classifier in
+ * working directory. expect output in dir option or subdirectories thereof.
+ * expect an options.properties in each directory that contains classifier
+ * output. See {@link #ClassifierEvaluationImporter()} for a list of options in
+ * options.properties. You can override options via system properties (java -D
+ * options).
+ * 
+ * @author vijay
+ */
+public class ClassifierEvaluationImporter {
+	private static final Log log = LogFactory
+			.getLog(ClassifierEvaluationImporter.class);
+
+	private Map<String, ClassifierEvaluationParser> nameToParserMap;
+
+	public Map<String, ClassifierEvaluationParser> getNameToParserMap() {
+		return nameToParserMap;
+	}
+
+	public void setNameToParserMap(
+			Map<String, ClassifierEvaluationParser> nameToParserMap) {
+		this.nameToParserMap = nameToParserMap;
+	}
+
+	@SuppressWarnings("static-access")
+	private static Options initOptions() {
+		Options options = new Options();
+		options.addOption(OptionBuilder
+				.withArgName("cvDir")
+				.hasArg()
+				.withDescription(
+						"results directory, defaults to working directory")
+				.isRequired(false).create("dir"));
+		options.addOption(OptionBuilder.withArgName("type").hasArg()
+				.withDescription("libsvm (default) or svmlight or semil")
+				.isRequired(true).create("type"));
+		return options;
+	}
+
+	/**
+	 * @param args
+	 * @throws Exception
+	 */
+	public static void main(String[] args) throws Exception {
+		Options options = initOptions();
+		if (args.length == 0) {
+			printHelp(options);
+		} else {
+			CommandLineParser oparser = new GnuParser();
+			ClassifierEvaluationImporter importer = KernelContextHolder
+					.getApplicationContext().getBean(
+							ClassifierEvaluationImporter.class);
+			try {
+				CommandLine line = oparser.parse(options, args);
+				importer.importDirectory(line);
+			} catch (ParseException e) {
+				printHelp(options);
+				throw e;
+			}
+		}
+	}
+
+	private ClassifierEvaluationParser getParser(CommandLine line) {
+		String type = line.getOptionValue("type", "libsvm");
+		return this.nameToParserMap.get(type);
+	}
+
+	/**
+	 * Expect directory with subdirectories for each evaluation. Subdirectories
+	 * must contain following in order for results to be processed:
+	 * <ul>
+	 * <li>
+	 * model.txt: libsvm model trained on training set
+	 * <li>predict.txt: libsvm predictions on test set
+	 * <li>options.properties: libsvm command line options
+	 * </ul>
+	 * 
+	 * @param line
+	 * @throws Exception
+	 */
+	public void importDirectory(CommandLine line) throws IOException {
+		ClassifierEvaluationParser lparser = getParser(line);
+		File directory = new File(line.getOptionValue("dir", "."));
+		importDirectory(directory, lparser);
+	}
+
+	/**
+	 * recursively import directory. We assume this directory contains
+	 * evaluation results if it has no subdirectories. Else we
+	 * look in subdirectories.
+	 * 
+	 * @param directory
+	 * @param lparser
+	 * @throws IOException
+	 */
+	public void importDirectory(File directory,
+			ClassifierEvaluationParser lparser) throws IOException {
+		File subdirs[] = directory.listFiles(new FileUtil.DirectoryFileFilter());
+		if(subdirs == null || subdirs.length == 0) {
+			// no subdirectories - assume this is a 'results' directory
+			try {
+				lparser.parseDirectory(new File("."), directory);
+			} catch (IOException ioe) {
+				log.error("error parsing directory: " + directory, ioe);
+			}
+		} else {
+			// look in subdirectories
+			for (File subdir : subdirs) {
+				importDirectory(subdir, lparser);
+			}
+		}
+		//
+		// if ("semil".equals(type) && checkFileRead(optionsFile)) {
+		// lparser.parseDirectory(new File("."), resultDir);
+		// } else if (("libsvm".equals(type) || "svmlight".equals(type))
+		// && checkFileRead(model) && checkFileRead(predict)
+		// && checkFileRead(optionsFile)) {
+		// String options = null;
+		// Double param1 = null;
+		// String param2 = null;
+		// InputStream isOptions = null;
+		// try {
+		// isOptions = new FileInputStream(optionsFile);
+		// Properties props = new Properties();
+		// props.load(isOptions);
+		// options = props.getProperty("kernel.eval.line");
+		// String strParam1 = props.getProperty("kernel.param1", null);
+		// if (strParam1 != null) {
+		// try {
+		// param1 = Double.parseDouble(strParam1);
+		// } catch (Exception e) {
+		// log.warn("error parasing param1: " + strParam1, e);
+		// }
+		// }
+		// param2 = props.getProperty("kernel.param2");
+		// } finally {
+		// isOptions.close();
+		// }
+		// if (options != null) {
+		// try {
+		// ClassifierEvaluation eval = lparser
+		// .parseClassifierEvaluation(line
+		// .getOptionValue("name"), line
+		// .getOptionValue("experiment"), line
+		// .getOptionValue("label"), options,
+		// predict, line.getOptionValue("test"),
+		// model, line
+		// .getOptionValue("instanceId"),
+		// output, "yes".equals(line
+		// .getOptionValue("storeProb",
+		// "no")));
+		// eval.setParam1(param1);
+		// eval.setParam2(param2);
+		// KernelContextHolder.getApplicationContext().getBean(
+		// ClassifierEvaluationDao.class)
+		// .saveClassifierEvaluation(eval,
+		// storeInstanceEval);
+		// } catch (Exception e) {
+		// // continue processing - don't give up because of one
+		// // bad file
+		// log.warn("error importing results, resultDir="
+		// + resultDir.getAbsolutePath(), e);
+		// }
+		// }
+		// }
+	}
+
+	private static void printHelp(Options options) {
+		HelpFormatter formatter = new HelpFormatter();
+		formatter.printHelp("java org.apache.ctakes.ytex.libsvm.ClassifierEvaluationImporter\n",
+				options);
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationParser.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationParser.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationParser.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationParser.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,73 @@
+package org.apache.ctakes.ytex.kernel;
+
+import java.io.File;
+import java.io.IOException;
+
+public interface ClassifierEvaluationParser {
+	public static final String YES = "yes";
+	public static final String NO = "no";
+
+	/**
+	 * Property keys for various parse options
+	 * 
+	 * @author vhacongarlav
+	 */
+	public enum ParseOption {
+		/**
+		 * key <tt>kernel.StoreInstanceEval</tt>.
+		 * 
+		 */
+		STORE_INSTANCE_EVAL("kernel.StoreInstanceEval", NO),
+		/**
+		 * key <tt>kernel.StoreProbabilities</tt>.
+		 */
+		STORE_PROBABILITIES("kernel.StoreProbabilities", NO),
+		/**
+		 * key <tt>kernel.StoreUnlabeled</tt>
+		 */
+		STORE_UNLABELED("kernel.StoreUnlabeled", NO),
+		/**
+		 * key <tt>kernel.StoreIRStats</tt>
+		 */
+		STORE_IRSTATS("kernel.StoreIRStats", YES),
+		/**
+		 * key <tt>kernel.data.basename</tt>
+		 * base name of file; other file names constructed relative to this.
+		 * label/fold/run taken from this file name. 
+		 */
+		DATA_BASENAME("kernel.data.basename", ""),
+//		/**
+//		 * key <tt>kernel.distance</tt>
+//		 * distance measure for semiL. default is euclidean.
+//		 */
+//		DISTANCE("kernel.distance", "euclidean"),
+//		/**
+//		 * key <tt>kernel.degree</tt>
+//		 * degree for knn graph for semiL. default is 10
+//		 */
+//		DEGREE("kernel.degree", "10"),
+		/**
+		 * key <tt>kernel.train.line</tt>
+		 * options used to train model.
+		 */
+		EVAL_LINE("kernel.train.line", null);
+		String optionKey;
+		public String getOptionKey() {
+			return optionKey;
+		}
+
+		String defaultValue;
+
+		public String getDefaultValue() {
+			return defaultValue;
+		}
+
+		ParseOption(String optionKey, String defaultValue) {
+			this.optionKey = optionKey;
+			this.defaultValue = defaultValue;
+		}
+	}
+
+	public void parseDirectory(File dataDir, File outputDir) throws IOException;
+
+}
\ No newline at end of file

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationResult.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationResult.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationResult.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationResult.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,41 @@
+package org.apache.ctakes.ytex.kernel;
+
+public class ClassifierEvaluationResult {
+	int targetClassId;
+	int predictedClassId;
+	int instanceId;
+	double[] probabilities;
+
+	public int getTargetClassId() {
+		return targetClassId;
+	}
+
+	public void setTargetClassId(int targetClassIndex) {
+		this.targetClassId = targetClassIndex;
+	}
+
+	public int getPredictedClassId() {
+		return predictedClassId;
+	}
+
+	public void setPredictedClassId(int predictedClassIndex) {
+		this.predictedClassId = predictedClassIndex;
+	}
+
+	public int getInstanceId() {
+		return instanceId;
+	}
+
+	public void setInstanceId(int instanceId) {
+		this.instanceId = instanceId;
+	}
+
+	public double[] getProbabilities() {
+		return probabilities;
+	}
+
+	public void setProbabilities(double[] probabilities) {
+		this.probabilities = probabilities;
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationResults.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationResults.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationResults.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/ClassifierEvaluationResults.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,54 @@
+package org.apache.ctakes.ytex.kernel;
+
+import java.util.List;
+
+public class ClassifierEvaluationResults {
+	public List<Integer> getClassIds() {
+		return classIds;
+	}
+
+	public void setClassIds(List<Integer> labels) {
+		this.classIds = labels;
+	}
+
+	public List<ClassifierEvaluationResult> getResults() {
+		return results;
+	}
+
+	public void setResults(List<ClassifierEvaluationResult> results) {
+		this.results = results;
+	}
+
+	public int[] getPredictedClassIds() {
+		int predictedClassLabels[] = new int[getResults().size()];
+		int i = 0;
+		for (ClassifierEvaluationResult result : getResults()) {
+			predictedClassLabels[i] = result.getPredictedClassId();
+			i++;
+		}
+		return predictedClassLabels;
+	}
+
+	public int[] getTargetClassIds() {
+		int targetClassLabels[] = new int[getResults().size()];
+		int i = 0;
+		for (ClassifierEvaluationResult result : getResults()) {
+			targetClassLabels[i] = result.getTargetClassId();
+			i++;
+		}
+		return targetClassLabels;
+	}
+
+	public double[] getProbabilities() {
+		double probabilities[] = new double[getResults().size()];
+		int i = 0;
+		for (ClassifierEvaluationResult result : getResults()) {
+			probabilities[i] = result.getProbabilities()[0];
+			i++;
+		}
+		return probabilities;
+	}
+
+	private List<Integer> classIds;
+	private List<ClassifierEvaluationResult> results;
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/CytoscapeHelper.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/CytoscapeHelper.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/CytoscapeHelper.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/CytoscapeHelper.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,25 @@
+package org.apache.ctakes.ytex.kernel;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.ctakes.ytex.kernel.model.ConceptGraph;
+
+
+public interface CytoscapeHelper {
+
+
+	public abstract boolean validateProps(Properties props);
+
+	public abstract void exportNetwork(String filePrefix, Properties props) throws IOException;
+
+	public abstract void exportSubtree(String conceptID, Properties props) throws IOException;
+
+	void exportNetwork(ConceptGraph cg, String corpusName,
+			String conceptGraphName, String conceptSetName,
+			int leafChildrenDepth, BufferedWriter networkData,
+			BufferedWriter nodeData) throws IOException;
+
+
+}
\ No newline at end of file

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/CytoscapeHelperImpl.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/CytoscapeHelperImpl.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/CytoscapeHelperImpl.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/CytoscapeHelperImpl.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,262 @@
+package org.apache.ctakes.ytex.kernel;
+
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.OptionGroup;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.ctakes.ytex.kernel.dao.ClassifierEvaluationDao;
+import org.apache.ctakes.ytex.kernel.dao.ConceptDao;
+import org.apache.ctakes.ytex.kernel.model.ConcRel;
+import org.apache.ctakes.ytex.kernel.model.ConceptGraph;
+import org.apache.ctakes.ytex.umls.dao.UMLSDao;
+
+
+public class CytoscapeHelperImpl implements CytoscapeHelper {
+	/**
+	 * @param args
+	 */
+	@SuppressWarnings("static-access")
+	public static void main(String args[]) throws ParseException, IOException {
+		Options options = new Options();
+		options.addOption(OptionBuilder
+				.withArgName("prop")
+				.hasArg()
+				.withDescription(
+						"property file with queries and other parameters. todo desc")
+				.create("prop"));
+		OptionGroup og = new OptionGroup();
+		og.addOption(OptionBuilder
+				.withArgName("network")
+				.hasArg()
+				.withDescription(
+						"create network using specified concept graph and corpus. creates prefix.sif with edges and prefix.node.txt with node data in working directory.")
+				.create("network"));
+		og.addOption(OptionBuilder
+				.withArgName("concept id")
+				.hasArg()
+				.withDescription(
+						"get all descendants of specified concept, creates concept_id.tree file in working directory")
+				.create("subtree"));
+		og.setRequired(true);
+		options.addOptionGroup(og);
+		try {
+			CommandLineParser parser = new GnuParser();
+			CommandLine line = parser.parse(options, args);
+			CytoscapeHelper cytHelper = KernelContextHolder
+					.getApplicationContext().getBean(CytoscapeHelper.class);
+			Properties props = new Properties(System.getProperties());
+			props.putAll(FileUtil.loadProperties(
+					line.getOptionValue("prop"), true));
+			if (!cytHelper.validateProps(props)) {
+				printHelp(options);
+			} else {
+				if (line.hasOption("network")) {
+					cytHelper.exportNetwork(line.getOptionValue("network"),
+							props);
+				} else if (line.hasOption("subtree")) {
+					cytHelper.exportSubtree(line.getOptionValue("subtree"),
+							props);
+				} else {
+					printHelp(options);
+				}
+			}
+		} catch (ParseException pe) {
+			printHelp(options);
+		}
+	}
+
+	private static void printHelp(Options options) {
+		HelpFormatter formatter = new HelpFormatter();
+		formatter.printHelp("java " + CytoscapeHelperImpl.class.getName()
+				+ " generate graphs and node lists for cytoscape", options);
+	}
+
+	protected ClassifierEvaluationDao classifierEvaluationDao;
+
+	protected ConceptDao conceptDao;
+
+	protected UMLSDao umlsDao;
+
+	private void addConcepts(ConceptGraph cg, String conceptId,
+			Set<String> nodesToInclude, Set<String> leaves) {
+		ConcRel cr = cg.getConceptMap().get(conceptId);
+		// only process this node if it isn't already in the list
+		if (!nodesToInclude.contains(cr.getConceptID())) {
+			// add me to the list
+			nodesToInclude.add(cr.getConceptID());
+			// iterate over parents and recurse
+			for (ConcRel crp : cr.getParents()) {
+				addConcepts(cg, crp.getConceptID(), nodesToInclude, leaves);
+				// parent is not a leaf - remove it from the list of candidate
+				// leaves
+				leaves.remove(crp.getConceptID());
+			}
+		}
+	}
+
+	private void addSubtree(Set<String> nodes, ConcRel cr) {
+		if (!nodes.contains(cr.getConceptID())) {
+			nodes.add(cr.getConceptID());
+			for (ConcRel crc : cr.getChildren()) {
+				addSubtree(nodes, crc);
+			}
+		}
+	}
+
+	private void exportEdges(ConceptGraph cg, BufferedWriter network) throws IOException {
+		for (ConcRel cr : cg.getConceptList()) {
+			for (ConcRel crc : cr.getChildren()) {
+				network.write(crc.getConceptID());
+				network.write("\tisa\t");
+				network.write(cr.getConceptID());
+				network.write("\n");
+			}
+		}
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see
+	 * org.apache.ctakes.ytex.kernel.CytoscapeHelper#exportNetwork(org.apache.ctakes.ytex.kernel.model.ConceptGraph,
+	 * java.lang.String, java.lang.String, java.lang.String, int,
+	 * java.io.BufferedWriter, java.io.BufferedWriter)
+	 */
+	@Override
+	public void exportNetwork(ConceptGraph cg, String corpusName,
+			String conceptGraphName, String conceptSetName,
+			int leafChildrenDepth, BufferedWriter networkData,
+			BufferedWriter nodeData) throws IOException {
+		exportEdges(cg, networkData);
+	}
+
+	@Override
+	public void exportNetwork(String filePrefix, Properties props)
+			throws IOException {
+		BufferedWriter networkData = null;
+		BufferedWriter nodeData = null;
+		try {
+			networkData = new BufferedWriter(
+					new FileWriter(filePrefix + ".sif"));
+			nodeData = new BufferedWriter(new FileWriter(filePrefix
+					+ ".node.txt"));
+			String conceptGraphName = props
+					.getProperty("org.apache.ctakes.ytex.conceptGraphName");
+			exportNetwork(this.conceptDao.getConceptGraph(conceptGraphName),
+					props.getProperty("org.apache.ctakes.ytex.corpusName"), conceptGraphName,
+					props.getProperty("org.apache.ctakes.ytex.conceptSetName"), 0, networkData,
+					nodeData);
+		} finally {
+			if (networkData != null) {
+				networkData.close();
+			}
+			if (nodeData != null) {
+				nodeData.close();
+			}
+		}
+	}
+
+	private void exportNodes(List<String> subList, BufferedWriter nodeData,
+			Map<String, Double> ic) throws IOException {
+		Map<String, String> nodeNames = this.umlsDao.getNames(subList);
+		for (String conceptID : subList) {
+			nodeData.write(conceptID);
+			nodeData.write("\t");
+			nodeData.write(Double.toString(ic.containsKey(conceptID) ? ic
+					.get(conceptID) : 0.0));
+			nodeData.write("\t\"");
+			nodeData.write(nodeNames.containsKey(conceptID) ? nodeNames.get(
+					conceptID).toString() : "");
+			nodeData.write("\"\n");
+		}
+
+	}
+
+	private void exportNodes(Set<String> exportedNodes,
+			BufferedWriter nodeData, Map<String, Double> ic) throws IOException {
+		List<String> exportedNodeList = new ArrayList<String>(exportedNodes);
+		int size = exportedNodes.size();
+		int chunks = size / 1000;
+		if (size % 1000 != 0)
+			chunks++;
+		for (int chunk = 0; chunk < chunks; chunk++) {
+			int start = chunk * 1000;
+			int end = Math.min(size - 1, (chunk + 1) * 1000 - 1);
+			List<String> subList = exportedNodeList.subList(start, end);
+			exportNodes(subList, nodeData, ic);
+		}
+	}
+
+	@Override
+	public void exportSubtree(String conceptID, Properties props)
+			throws IOException {
+		Set<String> nodes = new HashSet<String>();
+		ConceptGraph cg = this.conceptDao.getConceptGraph(props
+				.getProperty("org.apache.ctakes.ytex.conceptGraphName"));
+		ConcRel cr = cg.getConceptMap().get(conceptID);
+		if (cr != null) {
+			addSubtree(nodes, cr);
+		}
+		BufferedWriter w = null;
+		try {
+			w = new BufferedWriter(new FileWriter(conceptID + ".idlist"));
+			for (String node : nodes) {
+				w.write(node);
+				w.write("\n");
+			}
+		} finally {
+			if (w != null)
+				w.close();
+		}
+
+	}
+
+	public ClassifierEvaluationDao getClassifierEvaluationDao() {
+		return classifierEvaluationDao;
+	}
+
+
+	public ConceptDao getConceptDao() {
+		return conceptDao;
+	}
+
+	public UMLSDao getUmlsDao() {
+		return umlsDao;
+	}
+
+
+	public void setClassifierEvaluationDao(
+			ClassifierEvaluationDao classifierEvaluationDao) {
+		this.classifierEvaluationDao = classifierEvaluationDao;
+	}
+
+	public void setConceptDao(ConceptDao conceptDao) {
+		this.conceptDao = conceptDao;
+	}
+
+	public void setUmlsDao(UMLSDao umlsDao) {
+		this.umlsDao = umlsDao;
+	}
+
+	@Override
+	public boolean validateProps(Properties props) {
+//		String corpusName = props.getProperty("org.apache.ctakes.ytex.corpusName");
+		String conceptGraphName = props.getProperty("org.apache.ctakes.ytex.conceptGraphName");
+		return conceptGraphName != null;
+	}
+	
+}

Added: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/FileUtil.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/FileUtil.java?rev=1551254&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/FileUtil.java (added)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/FileUtil.java Mon Dec 16 16:30:30 2013
@@ -0,0 +1,297 @@
+package org.apache.ctakes.ytex.kernel;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileFilter;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Properties;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * miscellaneous utility functions used for data import/export
+ * 
+ * @author vijay
+ * 
+ */
+public class FileUtil {
+	static Pattern pFold = Pattern.compile("fold(\\d+)_");
+	static Pattern pRun = Pattern.compile("run(\\d+)_");
+	static Pattern pLabel = Pattern.compile("label([^_]+)_");
+
+	/**
+	 * extract fold from file name produced by file util
+	 * 
+	 * @param filename
+	 * @return null if not in file name
+	 */
+	public static int parseFoldFromFileName(String filename) {
+		Matcher m = pFold.matcher(filename);
+		if (m.find()) {
+			return Integer.parseInt(m.group(1));
+		} else
+			return 0;
+	}
+
+	/**
+	 * extract run from file name produced by file util
+	 * 
+	 * @param filename
+	 * @return null if not in file name
+	 */
+	public static Integer parseRunFromFileName(String filename) {
+		Matcher m = pRun.matcher(filename);
+		if (m.find()) {
+			return Integer.parseInt(m.group(1));
+		} else
+			return 0;
+	}
+
+	/**
+	 * extract label from file name produced by file util
+	 * 
+	 * @param filename
+	 * @return null if not in file name
+	 */
+	public static String parseLabelFromFileName(String filename) {
+		Matcher m = pLabel.matcher(filename);
+		if (m.find()) {
+			return m.group(1);
+		} else
+			return null;
+	}
+
+	/**
+	 * construct file name with label, run, fold with format
+	 * <tt>label[label]_run[run]_fold[fold]_</tt> only put in the non-null
+	 * pieces.
+	 * 
+	 * @param outdir
+	 * @param label
+	 * @param run
+	 * @param fold
+	 * @return
+	 */
+	public static String getFoldFilePrefix(String outdir, String label,
+			Integer run, Integer fold) {
+		StringBuilder builder = new StringBuilder();
+		if (outdir != null && outdir.length() > 0) {
+			builder.append(outdir);
+			if (!outdir.endsWith("/") && !outdir.endsWith("\\"))
+				builder.append(File.separator);
+		}
+		if (label != null && label.length() > 0) {
+			builder.append("label").append(label);
+			if ((run != null && run > 0) || (fold != null && fold > 0))
+				builder.append("_");
+		}
+		if (run != null && run > 0) {
+			builder.append("run").append(Integer.toString(run));
+			if (fold != null && fold > 0)
+				builder.append("_");
+		}
+		if (fold != null && fold > 0) {
+			builder.append("fold").append(Integer.toString(fold));
+		}
+		return builder.toString();
+	}
+
+	/**
+	 * generate file name for given outdir and 'scope'
+	 * 
+	 * @param outdir
+	 * @see #getFoldFilePrefix
+	 * @param label
+	 * @see #getFoldFilePrefix
+	 * @param run
+	 * @see #getFoldFilePrefix
+	 * @param fold
+	 * @see #getFoldFilePrefix
+	 * @param suffix
+	 *            added to file
+	 * @return
+	 */
+	public static String getScopedFileName(String outdir, String label,
+			Integer run, Integer fold, String suffix) {
+		String filename = FileUtil.getFoldFilePrefix(outdir, label, run, fold);
+		if (filename.length() > 0 && !filename.endsWith("/")
+				&& !filename.endsWith("\\") && !filename.endsWith("."))
+			filename += "_";
+		filename += suffix;
+		return filename;
+	}
+
+	public static String addFilenameToDir(String outdir, String filename) {
+		StringBuilder builder = new StringBuilder();
+		if (outdir != null && outdir.length() > 0) {
+			builder.append(outdir);
+			if (!outdir.endsWith("/") && !outdir.endsWith("\\"))
+				builder.append(File.separator);
+		}
+		builder.append(filename);
+		return builder.toString();
+	}
+
+	/**
+	 * construct file name for train/test set, will be like
+	 * <tt>label[label]_run[run]_fold[fold]_train</tt>
+	 * 
+	 * @param outdir
+	 * @param label
+	 * @param run
+	 * @param fold
+	 * @param train
+	 * @return
+	 */
+	public static String getDataFilePrefix(String outdir, String label,
+			Integer run, Integer fold, Boolean train) {
+		StringBuilder builder = new StringBuilder(getFoldFilePrefix(outdir,
+				label, run, fold));
+		if ((label != null && label.length() > 0)
+				|| (run != null && run > 0) || (fold != null && fold > 0))
+			builder.append("_");
+		if (train != null) {
+			if (train.booleanValue())
+				builder.append("train");
+			else
+				builder.append("test");
+		}
+		return builder.toString();
+	}
+
+	public static void createOutdir(String outdir) throws IOException {
+		if (outdir != null && outdir.length() > 0) {
+			File outdirF = new File(outdir);
+			if (outdirF.exists()) {
+				if (!outdirF.isDirectory()) {
+					throw new IOException(
+							"outdir exists but is not a directory " + outdir);
+				}
+			} else {
+				if (!outdirF.mkdirs()) {
+					throw new IOException("could not create directory: "
+							+ outdir);
+				}
+			}
+		}
+
+	}
+
+	public static boolean checkFileRead(String file) {
+		return (new File(file)).canRead();
+	}
+
+	/**
+	 * file filter to get directories
+	 * 
+	 * @author vijay
+	 * 
+	 */
+	public static class DirectoryFileFilter implements FileFilter {
+		@Override
+		public boolean accept(File pathname) {
+			return pathname.isDirectory();
+		}
+	}
+
+	/**
+	 * get files that start with specified prefix. just the file name, not
+	 * preceding directories, are checked.
+	 * 
+	 * @author vijay
+	 * 
+	 */
+	public static class PrefixFileFilter implements FileFilter {
+		String prefix = null;
+
+		public PrefixFileFilter(String prefix) {
+			this.prefix = prefix;
+		}
+
+		@Override
+		public boolean accept(File pathname) {
+			return pathname.getName().startsWith(prefix);
+		}
+
+	}
+
+	/**
+	 * filter files by suffix
+	 * 
+	 * @author vijay
+	 * 
+	 */
+	public static class SuffixFileFilter implements FileFilter {
+		String suffix = null;
+
+		public SuffixFileFilter(String prefix) {
+			this.suffix = prefix;
+		}
+
+		@Override
+		public boolean accept(File pathname) {
+			return pathname.getName().endsWith(suffix);
+		}
+
+	}
+
+	public static Properties loadProperties(String fileName,
+			boolean systemOverride) throws IOException {
+		Properties kernelProps = new Properties();
+		InputStream is = null;
+		boolean propsLoaded = false;
+		if (fileName != null && fileName.length() > 0) {
+			try {
+				is = new BufferedInputStream(new FileInputStream(fileName));
+				if (fileName.endsWith(".xml"))
+					kernelProps.loadFromXML(is);
+				else
+					kernelProps.load(is);
+				propsLoaded = true;
+			} catch (FileNotFoundException fe) {
+				// do nothing - options not required
+			} finally {
+				if (is != null)
+					is.close();
+			}
+		}
+		if (systemOverride) {
+			kernelProps.putAll(System.getProperties());
+			propsLoaded = true;
+		}
+		if (propsLoaded)
+			return kernelProps;
+		else
+			return null;
+	}
+
+	public static Double getDoubleProperty(Properties props, String propKey,
+			Double defaultProp) {
+		Double propValue = null;
+		String propStr = props.getProperty(propKey);
+		if (propStr != null && propStr.length() > 0) {
+			try {
+				propValue = Double.parseDouble(propStr);
+			} catch (NumberFormatException nfe) {
+			}
+		}
+		return propValue != null ? propValue : defaultProp;
+	}
+
+	public static Integer getIntegerProperty(Properties props, String propKey,
+			Integer defaultProp) {
+		Integer propValue = null;
+		String propStr = props.getProperty(propKey);
+		if (propStr != null && propStr.length() > 0) {
+			try {
+				propValue = Integer.parseInt(propStr);
+			} catch (NumberFormatException nfe) {
+			}
+		}
+		return propValue != null ? propValue : defaultProp;
+	}
+
+}



Mime
View raw message