Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id EC1FE10F26 for ; Sun, 7 Jul 2013 19:23:43 +0000 (UTC) Received: (qmail 41638 invoked by uid 500); 7 Jul 2013 19:23:43 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 41613 invoked by uid 500); 7 Jul 2013 19:23:43 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 41606 invoked by uid 99); 7 Jul 2013 19:23:43 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 07 Jul 2013 19:23:43 +0000 X-ASF-Spam-Status: No, hits=-1998.0 required=5.0 tests=ALL_TRUSTED,FB_GET_MEDS X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 07 Jul 2013 19:23:32 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id EB95C23889EC; Sun, 7 Jul 2013 19:23:09 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1500511 [2/6] - in /ctakes/sandbox/ctakes-scrubber-deid/src: ./ main/ main/java/ main/java/org/ main/java/org/apache/ main/java/org/apache/uima/ main/java/org/apache/uima/examples/ main/java/org/spin/ main/java/org/spin/scrubber/ main/java... Date: Sun, 07 Jul 2013 19:23:07 -0000 To: commits@ctakes.apache.org From: brittfitch@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20130707192309.EB95C23889EC@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/HumanAnnotationsExtractorI2B2.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/HumanAnnotationsExtractorI2B2.java?rev=1500511&view=auto ============================================================================== --- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/HumanAnnotationsExtractorI2B2.java (added) +++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/HumanAnnotationsExtractorI2B2.java Sun Jul 7 19:23:05 2013 @@ -0,0 +1,211 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + ******************************************************************************/ +package org.spin.scrubber.classification; + +import java.io.File; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathFactory; + +import org.apache.log4j.Logger; +import org.spin.scrubber.uima.dao.HumanAnnotationsDAO; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +/** + * Class for extracting different types of PHI tags out of annotated i2b2 smoking data. + * Writes to the database + * tables: human_annotations + * + * @author britt fitch bf19 + * + */ +public class HumanAnnotationsExtractorI2B2 implements HumanAnnotationsExtractor +{ + private static Logger log = Logger.getLogger(HumanAnnotationsExtractorI2B2.class); + + protected String tableSuffix; + private File dirInputHumanAnnotations; + + public HumanAnnotationsExtractorI2B2(String dirInputHumanAnnotations, String tableSuffix) + { + this(new File(dirInputHumanAnnotations), tableSuffix); + } + + public HumanAnnotationsExtractorI2B2(File dirInputHumanAnnotations, String tableSuffix) + { + this.dirInputHumanAnnotations = dirInputHumanAnnotations; + this.tableSuffix = tableSuffix; + + log.info("Starting Human Annotations Extractor (I2B2) @ "+ dirInputHumanAnnotations.getAbsolutePath()); + } + + /** + * @param args + * @throws Exception + */ + public static void main(String[] args) throws Exception + { + if(args.length!=2) + { + System.out.println("USAGE:\t\t HumanAnnotationsExtractorI2B2 input_directory table_suffix"); + System.out.println("EXAMPLE:\t HumanAnnotationsExtractorI2B2 ../data/ _test"); + } + else + { + HumanAnnotationsExtractorI2B2 runner= new HumanAnnotationsExtractorI2B2(args[0], args[1]); + runner.parseHumanAnnotations(); + } + } + + /** + * Parse XML such that the "real" absolute character positions can be obtained from the input XML + */ + public void parseHumanAnnotations() + { + HumanAnnotationsDAO dao = null; + + try + { + dao = new HumanAnnotationsDAO(tableSuffix); + + log.debug("Input path "+dirInputHumanAnnotations.getAbsolutePath()); + + if (!dirInputHumanAnnotations.exists()) + { + dirInputHumanAnnotations.createNewFile(); + } + + File[] files = dirInputHumanAnnotations.listFiles(); + + for (File f : files) + { + if (f.isDirectory()) + { + continue; + } + + log.debug("Reading: " + f.getName()); + + //read infile + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document doc = builder.parse(f); + + //START iterate over dom, update phi tags with start&end attribs. + Element root = doc.getDocumentElement(); + XPathFactory xPathfactory1 = XPathFactory.newInstance(); + XPath xpath1 = xPathfactory1.newXPath(); + XPathExpression expr1 = xpath1.compile("//TEXT"); + + NodeList nodes1 = (NodeList) expr1.evaluate(doc, XPathConstants.NODESET); + for (int n1int=0; n1int0 && !token.equals(",")) + { + dao.insert(id, type, token, startIdx, endIdx); + } + } + } + } + } + catch (Exception e) + { + log.error("Failed to parse human annotations from i2b2 input", e) ; + } + finally + { + dao.close(); + } + } + +} Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/HumanAnnotationsExtractorI2B2.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/HumanAnnotationsExtractorProtege.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/HumanAnnotationsExtractorProtege.java?rev=1500511&view=auto ============================================================================== --- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/HumanAnnotationsExtractorProtege.java (added) +++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/HumanAnnotationsExtractorProtege.java Sun Jul 7 19:23:05 2013 @@ -0,0 +1,279 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + ******************************************************************************/ +package org.spin.scrubber.classification; + +import org.apache.log4j.Logger; +import org.spin.scrubber.uima.dao.HumanAnnotationsDAO; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathFactory; +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * class for extracting different types of PHI tags out of protege/knowtator annotated data. + * + * Writes to the database + * tables: + * human_annotations_test + * human_annotations_train + * + * @author britt fitch bf19 + * + */ +public class HumanAnnotationsExtractorProtege implements HumanAnnotationsExtractor +{ + private static Logger log = Logger.getLogger(HumanAnnotationsExtractorProtege.class); + protected String tableSuffix; + private File dirInputHumanAnnotations; + + public HumanAnnotationsExtractorProtege(String dirInputHumanAnnotations, String tableSuffix) + { + this(new File(dirInputHumanAnnotations), tableSuffix); + } + + public HumanAnnotationsExtractorProtege(File dirInputHumanAnnotations, String tableSuffix) + { + this.dirInputHumanAnnotations = dirInputHumanAnnotations; + this.tableSuffix = tableSuffix; + + log.info("Starting Human Annotations Extractor (Protege) @ "+ dirInputHumanAnnotations.getAbsolutePath()); + } + + /** + * @param args + * @throws Exception + */ + public static void main(String[] args) throws Exception + { + if(args.length!=2) + { + System.out.println("USAGE:\t\t HumanAnnotationsExtractorProtege input_directory {_test|_train}"); + } + else + { + HumanAnnotationsExtractorProtege runner = new HumanAnnotationsExtractorProtege(args[0], args[1]); + runner.parseHumanAnnotations(); + } + } + + public void parseHumanAnnotations() + { + log.info("BEGIN Parsing human annotations."); + + HumanAnnotationsDAO dao = null; + try + { + dao = new HumanAnnotationsDAO(tableSuffix); + + log.debug("Input path "+dirInputHumanAnnotations.getAbsolutePath()); + + if (!dirInputHumanAnnotations.exists()) + { + dirInputHumanAnnotations.createNewFile(); + } + + File[] files = dirInputHumanAnnotations.listFiles(); + + if(files==null || files.length==0) + { + log.warn("There were no human annotations in dir: "+ dirInputHumanAnnotations.getAbsolutePath()); + } + + for (File f : files) + { + if (f.isDirectory()) + { + continue; + } + + log.debug("Reading: " + f.getName()); + + //read infile + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document doc = builder.parse(f); + + //continue on and parse PHI tags with the start & end dates. + XPathFactory xPathfactory = XPathFactory.newInstance(); + XPath xpath = xPathfactory.newXPath(); + + List annotationList = new ArrayList(); + Map classMentionMap = new HashMap(); + + //read all matching nodes + XPathExpression annotExpr = xpath.compile("//annotation"); + NodeList nodes = (NodeList) annotExpr.evaluate(doc, XPathConstants.NODESET); + + //for each ANNOTATION node in a file + for (int i=0; i0 && !token.equals(",")) + { + dao.insert(a.getFilenameShort(), classMentionMap.get(a.getMentionId()).toUpperCase(), token, a.getStartIdx(), a.getEndIdx()); + } + } + } + } + + log.info("DONE Parsing human annotations."); + } + catch (Exception e) + { + log.error("Could not parse human annotations", e); + } + finally + { + dao.close(); + } + } + + + private class KnowtatorAnnot + { + private String token; + private String filenameShort; + private int startIdx; + private int endIdx; + private String mentionClass; + private String mentionId; + + public String getToken() + { + return token; + } + + public void setToken(String token) + { + this.token = token; + } + + public String getFilenameShort() + { + return filenameShort; + } + public void setFilenameShort(String filenameShort) + { + this.filenameShort = filenameShort; + } + public int getStartIdx() + { + return startIdx; + } + public void setStartIdx(int startIdx) + { + this.startIdx = startIdx; + } + public int getEndIdx() + { + return endIdx; + } + public void setEndIdx(int endIdx) + { + this.endIdx = endIdx; + } + public String getMentionClass() + { + return mentionClass; + } + public void setMentionClass(String mentionClass) + { + this.mentionClass = mentionClass; + } + public String getMentionId() + { + return mentionId; + } + public void setMentionId(String mentionId) + { + this.mentionId = mentionId; + } + } +} Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/HumanAnnotationsExtractorProtege.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaClassifier.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaClassifier.java?rev=1500511&view=auto ============================================================================== --- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaClassifier.java (added) +++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaClassifier.java Sun Jul 7 19:23:05 2013 @@ -0,0 +1,276 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + ******************************************************************************/ +package org.spin.scrubber.classification; + +import org.spin.scrubber.ScrubberProperties; +import org.spin.scrubber.uima.dao.FeatureMatrixDAO; +import weka.classifiers.Classifier; +import weka.classifiers.Evaluation; +import weka.classifiers.meta.CostSensitiveClassifier; +import weka.core.Instances; +import weka.core.Utils; +import weka.core.converters.ConverterUtils.DataSource; +import weka.filters.Filter; +import weka.filters.unsupervised.attribute.Remove; + +import java.util.ArrayList; +import java.util.List; + +/** + * TODO: Serious refactoring needed, only use what we need for the implementation we published. (britt) + */ + +public class WekaClassifier +{ + private String testModelFilepath = null; + private String trainModelFilepath =null; + + private Remove removeFilter = null; + private Classifier classifier = null; + + private String tableSuffix = "_test"; //WekaClassifier only updates "test" tables. + + public WekaClassifier() + { + this(ScrubberProperties.getFileModelTrainAbsolutePath(), ScrubberProperties.getFileModelTestAbsolutePath()); + } + + public WekaClassifier(String trainModelFilepath, String testModelFilepath) + { + this.trainModelFilepath = trainModelFilepath; + this.testModelFilepath = testModelFilepath; + } + + public static void main(String[] args) throws Exception + { + WekaClassifier wc = new WekaClassifier(); + wc.test(); + } + + public void test() throws Exception + { + //get data + DataSource trainSource = new DataSource(getTrainModelFilepath()); + DataSource testSource = new DataSource(getTestModelFilepath()); + Instances trainData = trainSource.getDataSet(); + Instances testData = testSource.getDataSet(); + Instances orig = new Instances(testData); + + //remove filter + trainData = Filter.useFilter(trainData, getRemoveFilter(trainData)); + testData = Filter.useFilter(testData, getRemoveFilter(trainData)); + + //set class index + trainData.setClassIndex(trainData.numAttributes()-1); + System.out.println("class index: " + trainData.classIndex() +"\t"+ trainData.attribute(trainData.classIndex())); + + testData.setClassIndex(testData.numAttributes()-1); + System.out.println("class index: " + testData.classIndex() +"\t"+ testData.attribute(testData.classIndex())); + + //check headers + if (!trainData.equalHeaders(testData)) + { + System.out.println(); + throw new IllegalStateException("Incompatible train and test set!"); + } + else + { + System.out.println("headers match..."); + } + + //build classifier + System.out.println("building classifier..."); + Classifier base = getClassifier(); + base.buildClassifier(trainData); + System.out.println(base); + + //evaluate + System.out.println("evaluating..."); + Evaluation eval = new Evaluation(trainData); + + eval.evaluateModel(base, testData); + System.out.println(eval.toSummaryString()); + System.out.println(eval.toClassDetailsString()); + System.out.println(eval.toMatrixString()); + + //output txt results + List classifiedAsPHIList = printSummary(base, eval, testData, orig); + + //update db w/ classification + recordClassification(classifiedAsPHIList); + } + + private void recordClassification(List classifiedAsPHIList) throws Exception + { + String[] keys; + int id; + String classifiedAs; + FeatureMatrixDAO dao = new FeatureMatrixDAO(tableSuffix); + + for (String s : classifiedAsPHIList) + { + keys = s.split("\\|"); + if (keys.length!=2) + { + System.out.println("ERROR: unable to record classification, insufficient number of keys for '"+s+"'."); + } + else + { + id = Integer.parseInt(keys[0]); + classifiedAs = keys[1]; + + dao.updateClassification(classifiedAs, id); + } + } + } + + private List printSummary(Classifier base, Evaluation eval, Instances data, Instances orig) throws Exception + { + //return list of cases classified as PHI + List classifiedAsPHIList = new ArrayList(); + + // output evaluation + System.out.println(); + System.out.println("=== Setup ==="); + System.out.println("Classifier: " + getClassifier().getClass().getName() + " " + Utils.joinOptions(base.getOptions())); + System.out.println("Dataset: " + data.relationName()); + System.out.println(); + + // output predictions + int totalMisclass = 0; + int totalPHIClass = 0; + int totalNonPHIClass = 0; + System.out.println("# -\t actual -\t predicted -\t token"); + for (int i = 0; i < data.numInstances(); i++) + { + double pred = base.classifyInstance(data.instance(i)); + double actual = data.instance(i).classValue(); + String predString = data.classAttribute().value((int) pred); +// double[] dist = base.distributionForInstance(data.instance(i)); + + //save data for cases classified as PHI + //if (pred>0) + if(!predString.equalsIgnoreCase("NA")) + { + totalPHIClass++; + classifiedAsPHIList.add(orig.instance(i).stringValue(0)+"|"+predString); + } + else + { + totalNonPHIClass++; + } + + //output misclassifications + if (pred != actual && predString.equalsIgnoreCase("NA")) +// if (pred != actual && actual>1) + { + totalMisclass++; + System.out.print((i+1)); + System.out.print(" -\t "); + System.out.print(data.instance(i).toString(data.classIndex())); + System.out.print(" -\t "); + System.out.print(predString); + System.out.print(" -\t "); + //System.out.print(data.instance(i)); //comment out classified instance. + System.out.println(orig.instance(i).stringValue(0)); //show identifying part of the instance base on original instance data. + //System.out.print("\t\t\t"); + System.out.println(); + } + } + + System.out.println("total misclassifications: " + totalMisclass); + System.out.println(eval.toSummaryString()); + System.out.println(eval.toClassDetailsString()); + System.out.println(eval.toMatrixString()); + + System.out.println("total PHI class: " + totalPHIClass); + System.out.println("total non-PHI class: " + totalNonPHIClass); + + return classifiedAsPHIList; + } + + /* + * filters - only initialize once or it causes problems running on test/train sets + */ + private Filter getRemoveFilter(Instances data) throws Exception + { + if (removeFilter == null) + { + removeFilter = new Remove(); + System.out.println("\tExecuting Remove Filter..."); + String[] options = new String[2]; + options[0] = "-R"; + options[1] = "1"; + removeFilter.setOptions(options); + removeFilter.setInputFormat(data); + } + return removeFilter; + } + + public Classifier getClassifier() throws Exception + { + return getClassifier(ScrubberProperties.getClassificationCostMatrix()); + } + + public Classifier getClassifier(String classificationCostMatrix) throws Exception + { + if (classifier==null) + { + classifier = new CostSensitiveClassifier(); + String[] options = new String[11]; + int i=0; + options[i++] = "-cost-matrix"; + options[i++] = classificationCostMatrix; + options[i++] = "-S"; + options[i++] = "1"; + options[i++] = "-W"; + options[i++] = "weka.classifiers.trees.J48"; + options[i++] = "--"; + options[i++] = "-C"; + options[i++] = "0.25 "; + options[i++] = "-M"; + options[i++] = "2"; + + classifier.setOptions(options); + } + + return classifier; + } + + public String getTrainModelFilepath() + { + return trainModelFilepath; + } + + public void setTrainModelFilepath(String trainModelFilepath) + { + this.trainModelFilepath = trainModelFilepath; + } + + public String getTestModelFilepath() + { + return testModelFilepath; + } + + public void setTestModelFilepath(String testModelFilepath) + { + this.testModelFilepath = testModelFilepath; + } + +} Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaClassifier.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractor.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractor.java?rev=1500511&view=auto ============================================================================== --- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractor.java (added) +++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractor.java Sun Jul 7 19:23:05 2013 @@ -0,0 +1,131 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + ******************************************************************************/ +package org.spin.scrubber.classification; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.Scanner; + +import org.apache.log4j.Logger; + +/** + * + * @author britt fitch + * + */ +public abstract class WekaDataExtractor +{ + private static Logger log = Logger.getLogger(WekaDataExtractor.class); + + private String dirModels = null; + private String modelName = null; + private String tableSuffix = null; + + public WekaDataExtractor(String dirModels, String modelName) + { + this.dirModels = dirModels; + this.modelName = modelName; + } + + public void writeFile(String pathToFile, String content) throws IOException + { + Writer out = null; + try + { + out = new OutputStreamWriter(new FileOutputStream(pathToFile)); + out.write(content); + } + catch (IOException e) + { + log.error("Unable to write to file: " + pathToFile, e); + throw e; + } + finally + { + out.close(); + } + } + + public String readFile(String pathToFile) throws FileNotFoundException + { + StringBuilder text = new StringBuilder(); + String NL = System.getProperty("line.separator"); + Scanner scanner = null; + try + { + scanner = new Scanner(new FileInputStream(pathToFile)); + while (scanner.hasNextLine()) + { + text.append(scanner.nextLine() + NL); + } + } + catch (FileNotFoundException e) + { + log.error("Unable to read file: " + pathToFile, e); + throw e; + } + finally + { + scanner.close(); + } + + return text.toString(); + } + + public abstract void generateModel() throws Exception; + + /** + * Delete old model. + * @param pathToFile - file path of the file to be deleted. + */ + protected void deleteModel(String pathToFile) + { + File model = new File(pathToFile); + if(model.exists()) + { + log.info("deleting model: " + pathToFile); + model.delete(); + } + } + + public String getDirModels() + { + return dirModels; + } + + public void setDirModels(String dirModels) + { + this.dirModels = dirModels; + } + + public String getModelName() + { + return modelName; + } + + public void setModelName(String modelName) + { + this.modelName = modelName; + } +} Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractor.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractorTest.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractorTest.java?rev=1500511&view=auto ============================================================================== --- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractorTest.java (added) +++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractorTest.java Sun Jul 7 19:23:05 2013 @@ -0,0 +1,78 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + ******************************************************************************/ +package org.spin.scrubber.classification; + +import org.apache.log4j.Logger; +import org.spin.scrubber.ScrubberProperties; +import org.spin.scrubber.uima.dao.FeatureMatrixDAO; + +import java.io.File; +import java.util.List; + +/** + * + * @author britt fitch + * + */ +public class WekaDataExtractorTest extends WekaDataExtractor +{ + private static Logger log = Logger.getLogger(WekaDataExtractorTest.class); + private String tableSuffix = "_test"; + + public WekaDataExtractorTest() + { + super(ScrubberProperties.getDirModels(), ScrubberProperties.getFileModelTest()); + } + + public static void main(String[] args) throws Exception + { + WekaDataExtractor extractor = new WekaDataExtractorTest(); + extractor.generateModel(); + } + + public void generateModel() throws Exception + { + String pathToArff = getDirModels() + File.separator + getModelName(); + + //delete old arff + deleteModel(pathToArff); + + //get weka header + StringBuilder sb = new StringBuilder(readFile(getDirModels() + File.separator + "weka_header.txt")); //TODO: refactor + + //select records for output model + List rows = new FeatureMatrixDAO(tableSuffix).selectDataSetTest(); + + //clean file according to .sed + for (String row : rows) + { + row = row.replaceAll(",',", ",apos,"); + row = row.replaceAll(",,,", ",comma,"); + row = row.replaceAll(",\\.,", ",period,"); + row = row.replaceAll(",:,", ",colon,"); + row = row.replaceAll(",\\(,", ",none,"); + row = row.replaceAll(",\\),", ",none,"); + row = row.replaceAll(",\\$,", ",none,"); + sb.append(row); + sb.append("\n"); + } + + writeFile(pathToArff, sb.toString()); + } +} Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractorTest.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractorTrain.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractorTrain.java?rev=1500511&view=auto ============================================================================== --- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractorTrain.java (added) +++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractorTrain.java Sun Jul 7 19:23:05 2013 @@ -0,0 +1,78 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + ******************************************************************************/ +package org.spin.scrubber.classification; + +import java.io.File; +import java.util.List; + +import org.apache.log4j.Logger; +import org.spin.scrubber.ScrubberProperties; +import org.spin.scrubber.uima.dao.FeatureMatrixDAO; + +/** + * + * @author britt fitch + * + */ +public class WekaDataExtractorTrain extends WekaDataExtractor +{ + private static Logger log = Logger.getLogger(WekaDataExtractorTrain.class); + private String tableSuffix = "_train"; + + public WekaDataExtractorTrain() + { + super(ScrubberProperties.getDirModels(), ScrubberProperties.getFileModelTrain()); + } + + public static void main(String[] args) throws Exception + { + WekaDataExtractor extractor = new WekaDataExtractorTrain(); + extractor.generateModel(); + } + + public void generateModel() throws Exception + { + String pathToArff = getDirModels() + File.separator + getModelName(); + + //delete old arff + deleteModel(pathToArff); + + //get weka header + StringBuilder sb = new StringBuilder(readFile(getDirModels() + File.separator + "weka_header.txt")); //TODO: refactor + + //select records for output model + List rows = new FeatureMatrixDAO(tableSuffix).selectDataSetTrain(); + + //clean file according to .sed + for (String row : rows) + { + row = row.replaceAll(",',", ",apos,"); + row = row.replaceAll(",,,", ",comma,"); + row = row.replaceAll(",\\.,", ",period,"); + row = row.replaceAll(",:,", ",colon,"); + row = row.replaceAll(",\\(,", ",none,"); + row = row.replaceAll(",\\),", ",none,"); + row = row.replaceAll(",\\$,", ",none,"); + sb.append(row); + sb.append("\n"); + } + + writeFile(pathToArff, sb.toString()); + } +} Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/classification/WekaDataExtractorTrain.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/AnnotationsPubsPosCounter.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/AnnotationsPubsPosCounter.java?rev=1500511&view=auto ============================================================================== --- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/AnnotationsPubsPosCounter.java (added) +++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/AnnotationsPubsPosCounter.java Sun Jul 7 19:23:05 2013 @@ -0,0 +1,60 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + ******************************************************************************/ +package org.spin.scrubber.oneoff; + +import org.spin.scrubber.uima.dao.AnnotationsPubsDAO; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@Deprecated +public class AnnotationsPubsPosCounter +{ + + /** + * @param args + * @throws Exception + * + * this class was used to generate a distribution of parts of speech across the set of pubs + * for comparison with the distribution of part of speech across the cases + * and the distribution of PoS for known phi (based on gold standard) + */ + public static void main(String[] args) throws Exception + { + //AnnotationsPubsPosCounter runner = new AnnotationsPubsPosCounter(); + Map pubPosMap = new HashMap(); + AnnotationsPubsDAO dao = new AnnotationsPubsDAO(); + List fileList = dao.selectDistinctFilenameShort(); + + //sum pos for pubs + for (String filename : fileList) + { + pubPosMap = dao.selectDistinctPOS(pubPosMap, filename); + } + + //insert pos for pubs + for (String pos : pubPosMap.keySet()) + { + int posCnt = pubPosMap.get(pos); + dao.insertPubsPOS(pos, posCnt); + } + } + +} Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/AnnotationsPubsPosCounter.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/CaseFeaturePHITypeUpdater.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/CaseFeaturePHITypeUpdater.java?rev=1500511&view=auto ============================================================================== --- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/CaseFeaturePHITypeUpdater.java (added) +++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/CaseFeaturePHITypeUpdater.java Sun Jul 7 19:23:05 2013 @@ -0,0 +1,90 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + ******************************************************************************/ +package org.spin.scrubber.oneoff; + +import org.spin.scrubber.beans.CaseFeature; +import org.spin.scrubber.uima.dao.FeatureMatrixDAO; +import org.spin.scrubber.uima.dao.HumanAnnotationsDAO; + +import java.util.List; + +@Deprecated +public class CaseFeaturePHITypeUpdater //implements Runnable +{ + +// public static void main(String[] args) throws Exception +// { +// CaseFeaturePHITypeUpdater runner = new CaseFeaturePHITypeUpdater(); +// runner.run(); +// } +// +// public void run() +// { +// FeatureMatrixDAO cfDAO; +// HumanAnnotationsDAO phiDao; +// try +// { +// cfDAO = new FeatureMatrixDAO(); +// phiDao = new HumanAnnotationsDAO(); +// +//// //update TRAIN set +//// List caseFeatureList = cfDAO.selectAllCaseFeatures(); +//// System.out.println("INFO: " + caseFeatureList.size() + " train instances to be updated..."); +//// for (CaseFeature cf : caseFeatureList) +//// { +//// try +//// { +//// String phiLabel = phiDao.selectPHIType(cf.getFilename_short(), cf.getStartIdx()); +//// if (phiLabel!=null) +//// { +//// cfDAO.updateCaseFeaturePHITypeTrain(cf.getId(), phiLabel); +//// } +//// } +//// catch(Exception e) +//// { +//// System.out.println("ERROR: (train) token|id: " +cf.getToken()+"|"+cf.getId() ); +//// e.printStackTrace(); +//// } +//// } +// +// //upate TEST set +// List caseFeatureTESTList = cfDAO.selectAllTestCaseFeatures(); +// System.out.println("INFO: " + caseFeatureTESTList.size() + " test instances to be updated..."); +// for (CaseFeature cf : caseFeatureTESTList) +// { +// try +// { +// String phiLabel = phiDao.selectPHIType(cf.getFilename_short(), cf.getStartIdx()); +// if (phiLabel!=null) +// { +// cfDAO.updateCaseFeaturePHITypeTest(cf.getId(), phiLabel); +// } +// } +// catch(Exception e) +// { +// System.out.println("ERROR: (test) token|id: " +cf.getToken()+"|"+cf.getId() ); +// e.printStackTrace(); +// } +// } +// } catch (Exception e1) +// { +// e1.printStackTrace(); +// } +// } +} Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/CaseFeaturePHITypeUpdater.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/CaseFeatureTFUpdater.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/CaseFeatureTFUpdater.java?rev=1500511&view=auto ============================================================================== --- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/CaseFeatureTFUpdater.java (added) +++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/CaseFeatureTFUpdater.java Sun Jul 7 19:23:05 2013 @@ -0,0 +1,129 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + ******************************************************************************/ +/** + * + */ +package org.spin.scrubber.oneoff; + +import org.spin.scrubber.beans.CaseFeature; +import org.spin.scrubber.uima.dao.FeatureMatrixDAO; +import org.spin.scrubber.uima.dao.TfDAO; + +import java.util.List; +import java.util.Map; + +/** + * This class is intended to be run as a one-off process (thus the package *.oneoff) to update the TF features. + * The normal process of calculating the TF occurs in TFAnnotator. + * + * @author BF19 + * + */ +@Deprecated +public class CaseFeatureTFUpdater //implements Runnable +{ +// public static void main(String[] args) throws Exception +// { +// CaseFeatureTFUpdater runner = new CaseFeatureTFUpdater(); +// runner.run(); +// } +// +// public void run() +// { +// //select all pub token/cnt/pos +// Map pubsTFMap; +// try +// { +// pubsTFMap = new TfDAO().selectPubTFMap(); +// +// updateTrain(pubsTFMap); +// updateTest(pubsTFMap); +// } +// catch (Exception e) +// { +// e.printStackTrace(); +// } +// } +// +// private void updateTrain(Map pubsTFMap) +// { +// FeatureMatrixDAO cfDAO; +// try +// { +// cfDAO = new FeatureMatrixDAO(); +// +// //select all feature records (to be updated) +// List caseFeatureList = cfDAO.selectAllCaseFeatures(); +// +// for (CaseFeature cf : caseFeatureList) +// { +// try +// { +// //update all_pubs features +// int pubTermPosCnt = (pubsTFMap.get(cf.getToken()+"|"+cf.getPos())==null) ? 0 : pubsTFMap.get(cf.getToken()+"|"+cf.getPos()); +// int pubTermCnt = (pubsTFMap.get(cf.getToken())==null) ? 0 : pubsTFMap.get(cf.getToken()); +// float pubTotalCnt = Float.valueOf(Integer.toString(pubsTFMap.get("totalPubCount"))); +// cfDAO.updateCaseFeatureTFAllPubs(cf.getId(), pubTermPosCnt/pubTotalCnt, pubTermCnt/pubTotalCnt); +// } +// catch(Exception e) +// { +// System.out.println("ERROR: token|id: " +cf.getToken()+"|"+cf.getId() ); +// e.printStackTrace(); +// } +// } +// } +// catch (Exception e1) +// { +// e1.printStackTrace(); +// } +// } +// +// private void updateTest(Map pubsTFMap) +// { +// FeatureMatrixDAO cfDAO; +// try +// { +// cfDAO = new FeatureMatrixDAO(); +// +// //select all feature records (to be updated) +// List caseFeatureList = cfDAO.selectAllTestCaseFeatures(); +// +// for (CaseFeature cf : caseFeatureList) +// { +// try +// { +// //update all_pubs features +// int pubTermPosCnt = (pubsTFMap.get(cf.getToken()+"|"+cf.getPos())==null) ? 0 : pubsTFMap.get(cf.getToken()+"|"+cf.getPos()); +// int pubTermCnt = (pubsTFMap.get(cf.getToken())==null) ? 0 : pubsTFMap.get(cf.getToken()); +// float pubTotalCnt = Float.valueOf(Integer.toString(pubsTFMap.get("totalPubCount"))); +// cfDAO.updateTestCaseFeatureTFAllPubs(cf.getId(), pubTermPosCnt/pubTotalCnt, pubTermCnt/pubTotalCnt); +// } +// catch(Exception e) +// { +// System.out.println("ERROR: token|id: " +cf.getToken()+"|"+cf.getId() ); +// e.printStackTrace(); +// } +// } +// } +// catch (Exception e1) +// { +// e1.printStackTrace(); +// } +// } +} Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/CaseFeatureTFUpdater.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/XmlToTextI2B2.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/XmlToTextI2B2.java?rev=1500511&view=auto ============================================================================== --- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/XmlToTextI2B2.java (added) +++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/XmlToTextI2B2.java Sun Jul 7 19:23:05 2013 @@ -0,0 +1,847 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + ******************************************************************************/ +/** + * + */ +package org.spin.scrubber.oneoff; + +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathFactory; +import java.io.File; +import java.io.FileWriter; +import java.util.ArrayList; +import java.util.List; + +/** + * @author britt fitch bf19 + * + * takes 2 command line params: + * inDirectory : containing xml files + * outDirectory : where to place txt files (assumes this dir has "train" and "test" subdirs) + * + * parses i2b2 xml file into individual text files for use by scrubber. + * + * THIS IS REQUIRED TO REPRODUCE FINDINGS REPORTED IN THE PAPER. + */ +public class XmlToTextI2B2 implements Runnable +{ + private String inDirectory; + private String outDirectory; + private List allTrainCaseList; + + public XmlToTextI2B2(String in, String out) + { + this.inDirectory = in; + this.outDirectory = out; + } + + public void run() + { + try + { + File inDir = new File(inDirectory); + + if (!inDir.exists()) + { + inDir.createNewFile(); + } + + File[] files = inDir.listFiles(); + + for (File f : files) + { + if (f.isDirectory()) + { + continue; + } + + System.out.println("XmlToText for: " + f.getName()); + + //read infile + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document doc = builder.parse(f); + XPathFactory xPathfactory = XPathFactory.newInstance(); + XPath xpath = xPathfactory.newXPath(); + XPathExpression expr = xpath.compile("//TEXT"); + + //read all matching nodes + NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET); + + FileWriter writer = null; + + //for each node in a file, write out to a flat txt file of the same name. + for (int i=0; i getAllTrainCaseList() + { + if (allTrainCaseList ==null) + { + allTrainCaseList = new ArrayList(); + allTrainCaseList.add("1"); + allTrainCaseList.add("10"); + allTrainCaseList.add("100"); + allTrainCaseList.add("101"); + allTrainCaseList.add("102"); + allTrainCaseList.add("103"); + allTrainCaseList.add("104"); + allTrainCaseList.add("105"); + allTrainCaseList.add("106"); + allTrainCaseList.add("107"); + allTrainCaseList.add("108"); + allTrainCaseList.add("11"); + allTrainCaseList.add("110"); + allTrainCaseList.add("112"); + allTrainCaseList.add("113"); + allTrainCaseList.add("114"); + allTrainCaseList.add("115"); + allTrainCaseList.add("116"); + allTrainCaseList.add("117"); + allTrainCaseList.add("118"); + allTrainCaseList.add("119"); + allTrainCaseList.add("12"); + allTrainCaseList.add("120"); + allTrainCaseList.add("122"); + allTrainCaseList.add("123"); + allTrainCaseList.add("124"); + allTrainCaseList.add("125"); + allTrainCaseList.add("126"); + allTrainCaseList.add("127"); + allTrainCaseList.add("128"); + allTrainCaseList.add("129"); + allTrainCaseList.add("13"); + allTrainCaseList.add("130"); + allTrainCaseList.add("131"); + allTrainCaseList.add("132"); + allTrainCaseList.add("134"); + allTrainCaseList.add("137"); + allTrainCaseList.add("138"); + allTrainCaseList.add("139"); + allTrainCaseList.add("140"); + allTrainCaseList.add("141"); + allTrainCaseList.add("143"); + allTrainCaseList.add("144"); + allTrainCaseList.add("145"); + allTrainCaseList.add("146"); + allTrainCaseList.add("147"); + allTrainCaseList.add("148"); + allTrainCaseList.add("149"); + allTrainCaseList.add("15"); + allTrainCaseList.add("150"); + allTrainCaseList.add("152"); + allTrainCaseList.add("153"); + allTrainCaseList.add("154"); + allTrainCaseList.add("155"); + allTrainCaseList.add("156"); + allTrainCaseList.add("157"); + allTrainCaseList.add("158"); + allTrainCaseList.add("159"); + allTrainCaseList.add("16"); + allTrainCaseList.add("160"); + allTrainCaseList.add("161"); + allTrainCaseList.add("162"); + allTrainCaseList.add("163"); + allTrainCaseList.add("164"); + allTrainCaseList.add("165"); + allTrainCaseList.add("166"); + allTrainCaseList.add("169"); + allTrainCaseList.add("17"); + allTrainCaseList.add("170"); + allTrainCaseList.add("171"); + allTrainCaseList.add("172"); + allTrainCaseList.add("173"); + allTrainCaseList.add("174"); + allTrainCaseList.add("175"); + allTrainCaseList.add("178"); + allTrainCaseList.add("179"); + allTrainCaseList.add("18"); + allTrainCaseList.add("180"); + allTrainCaseList.add("181"); + allTrainCaseList.add("182"); + allTrainCaseList.add("183"); + allTrainCaseList.add("184"); + allTrainCaseList.add("186"); + allTrainCaseList.add("187"); + allTrainCaseList.add("188"); + allTrainCaseList.add("189"); + allTrainCaseList.add("19"); + allTrainCaseList.add("190"); + allTrainCaseList.add("191"); + allTrainCaseList.add("192"); + allTrainCaseList.add("193"); + allTrainCaseList.add("195"); + allTrainCaseList.add("196"); + allTrainCaseList.add("197"); + allTrainCaseList.add("198"); + allTrainCaseList.add("199"); + allTrainCaseList.add("2"); + allTrainCaseList.add("20"); + allTrainCaseList.add("200"); + allTrainCaseList.add("201"); + allTrainCaseList.add("203"); + allTrainCaseList.add("204"); + allTrainCaseList.add("205"); + allTrainCaseList.add("207"); + allTrainCaseList.add("208"); + allTrainCaseList.add("209"); + allTrainCaseList.add("21"); + allTrainCaseList.add("210"); + allTrainCaseList.add("211"); + allTrainCaseList.add("212"); + allTrainCaseList.add("213"); + allTrainCaseList.add("215"); + allTrainCaseList.add("216"); + allTrainCaseList.add("217"); + allTrainCaseList.add("218"); + allTrainCaseList.add("219"); + allTrainCaseList.add("22"); + allTrainCaseList.add("221"); + allTrainCaseList.add("222"); + allTrainCaseList.add("223"); + allTrainCaseList.add("224"); + allTrainCaseList.add("225"); + allTrainCaseList.add("226"); + allTrainCaseList.add("227"); + allTrainCaseList.add("228"); + allTrainCaseList.add("229"); + allTrainCaseList.add("23"); + allTrainCaseList.add("230"); + allTrainCaseList.add("231"); + allTrainCaseList.add("232"); + allTrainCaseList.add("234"); + allTrainCaseList.add("235"); + allTrainCaseList.add("236"); + allTrainCaseList.add("237"); + allTrainCaseList.add("238"); + allTrainCaseList.add("239"); + allTrainCaseList.add("24"); + allTrainCaseList.add("240"); + allTrainCaseList.add("241"); + allTrainCaseList.add("242"); + allTrainCaseList.add("243"); + allTrainCaseList.add("244"); + allTrainCaseList.add("245"); + allTrainCaseList.add("246"); + allTrainCaseList.add("247"); + allTrainCaseList.add("248"); + allTrainCaseList.add("249"); + allTrainCaseList.add("250"); + allTrainCaseList.add("251"); + allTrainCaseList.add("252"); + allTrainCaseList.add("253"); + allTrainCaseList.add("254"); + allTrainCaseList.add("255"); + allTrainCaseList.add("256"); + allTrainCaseList.add("257"); + allTrainCaseList.add("258"); + allTrainCaseList.add("259"); + allTrainCaseList.add("26"); + allTrainCaseList.add("260"); + allTrainCaseList.add("261"); + allTrainCaseList.add("262"); + allTrainCaseList.add("264"); + allTrainCaseList.add("265"); + allTrainCaseList.add("266"); + allTrainCaseList.add("267"); + allTrainCaseList.add("269"); + allTrainCaseList.add("27"); + allTrainCaseList.add("270"); + allTrainCaseList.add("271"); + allTrainCaseList.add("272"); + allTrainCaseList.add("273"); + allTrainCaseList.add("274"); + allTrainCaseList.add("275"); + allTrainCaseList.add("276"); + allTrainCaseList.add("277"); + allTrainCaseList.add("278"); + allTrainCaseList.add("279"); + allTrainCaseList.add("28"); + allTrainCaseList.add("280"); + allTrainCaseList.add("281"); + allTrainCaseList.add("282"); + allTrainCaseList.add("283"); + allTrainCaseList.add("284"); + allTrainCaseList.add("285"); + allTrainCaseList.add("286"); + allTrainCaseList.add("287"); + allTrainCaseList.add("288"); + allTrainCaseList.add("289"); + allTrainCaseList.add("29"); + allTrainCaseList.add("290"); + allTrainCaseList.add("291"); + allTrainCaseList.add("292"); + allTrainCaseList.add("293"); + allTrainCaseList.add("294"); + allTrainCaseList.add("295"); + allTrainCaseList.add("296"); + allTrainCaseList.add("297"); + allTrainCaseList.add("299"); + allTrainCaseList.add("3"); + allTrainCaseList.add("30"); + allTrainCaseList.add("300"); + allTrainCaseList.add("301"); + allTrainCaseList.add("302"); + allTrainCaseList.add("303"); + allTrainCaseList.add("304"); + allTrainCaseList.add("305"); + allTrainCaseList.add("306"); + allTrainCaseList.add("307"); + allTrainCaseList.add("308"); + allTrainCaseList.add("309"); + allTrainCaseList.add("31"); + allTrainCaseList.add("310"); + allTrainCaseList.add("311"); + allTrainCaseList.add("312"); + allTrainCaseList.add("313"); + allTrainCaseList.add("314"); + allTrainCaseList.add("315"); + allTrainCaseList.add("316"); + allTrainCaseList.add("317"); + allTrainCaseList.add("318"); + allTrainCaseList.add("32"); + allTrainCaseList.add("320"); + allTrainCaseList.add("321"); + allTrainCaseList.add("322"); + allTrainCaseList.add("323"); + allTrainCaseList.add("324"); + allTrainCaseList.add("325"); + allTrainCaseList.add("326"); + allTrainCaseList.add("327"); + allTrainCaseList.add("329"); + allTrainCaseList.add("33"); + allTrainCaseList.add("330"); + allTrainCaseList.add("331"); + allTrainCaseList.add("332"); + allTrainCaseList.add("333"); + allTrainCaseList.add("334"); + allTrainCaseList.add("335"); + allTrainCaseList.add("336"); + allTrainCaseList.add("337"); + allTrainCaseList.add("338"); + allTrainCaseList.add("339"); + allTrainCaseList.add("34"); + allTrainCaseList.add("340"); + allTrainCaseList.add("341"); + allTrainCaseList.add("342"); + allTrainCaseList.add("343"); + allTrainCaseList.add("344"); + allTrainCaseList.add("345"); + allTrainCaseList.add("346"); + allTrainCaseList.add("347"); + allTrainCaseList.add("348"); + allTrainCaseList.add("349"); + allTrainCaseList.add("350"); + allTrainCaseList.add("351"); + allTrainCaseList.add("352"); + allTrainCaseList.add("354"); + allTrainCaseList.add("355"); + allTrainCaseList.add("356"); + allTrainCaseList.add("357"); + allTrainCaseList.add("358"); + allTrainCaseList.add("359"); + allTrainCaseList.add("36"); + allTrainCaseList.add("360"); + allTrainCaseList.add("361"); + allTrainCaseList.add("362"); + allTrainCaseList.add("363"); + allTrainCaseList.add("364"); + allTrainCaseList.add("366"); + allTrainCaseList.add("367"); + allTrainCaseList.add("368"); + allTrainCaseList.add("369"); + allTrainCaseList.add("37"); + allTrainCaseList.add("370"); + allTrainCaseList.add("372"); + allTrainCaseList.add("373"); + allTrainCaseList.add("374"); + allTrainCaseList.add("375"); + allTrainCaseList.add("376"); + allTrainCaseList.add("378"); + allTrainCaseList.add("379"); + allTrainCaseList.add("38"); + allTrainCaseList.add("380"); + allTrainCaseList.add("381"); + allTrainCaseList.add("382"); + allTrainCaseList.add("383"); + allTrainCaseList.add("384"); + allTrainCaseList.add("385"); + allTrainCaseList.add("386"); + allTrainCaseList.add("387"); + allTrainCaseList.add("388"); + allTrainCaseList.add("389"); + allTrainCaseList.add("39"); + allTrainCaseList.add("390"); + allTrainCaseList.add("391"); + allTrainCaseList.add("392"); + allTrainCaseList.add("393"); + allTrainCaseList.add("394"); + allTrainCaseList.add("395"); + allTrainCaseList.add("396"); + allTrainCaseList.add("397"); + allTrainCaseList.add("398"); + allTrainCaseList.add("399"); + allTrainCaseList.add("4"); + allTrainCaseList.add("40"); + allTrainCaseList.add("400"); + allTrainCaseList.add("401"); + allTrainCaseList.add("402"); + allTrainCaseList.add("403"); + allTrainCaseList.add("404"); + allTrainCaseList.add("405"); + allTrainCaseList.add("407"); + allTrainCaseList.add("408"); + allTrainCaseList.add("409"); + allTrainCaseList.add("411"); + allTrainCaseList.add("412"); + allTrainCaseList.add("414"); + allTrainCaseList.add("415"); + allTrainCaseList.add("416"); + allTrainCaseList.add("417"); + allTrainCaseList.add("418"); + allTrainCaseList.add("419"); + allTrainCaseList.add("42"); + allTrainCaseList.add("421"); + allTrainCaseList.add("43"); + allTrainCaseList.add("434"); + allTrainCaseList.add("44"); + allTrainCaseList.add("45"); + allTrainCaseList.add("452"); + allTrainCaseList.add("46"); + allTrainCaseList.add("464"); + allTrainCaseList.add("468"); + allTrainCaseList.add("47"); + allTrainCaseList.add("48"); + allTrainCaseList.add("485"); + allTrainCaseList.add("49"); + allTrainCaseList.add("497"); + allTrainCaseList.add("5"); + allTrainCaseList.add("50"); + allTrainCaseList.add("51"); + allTrainCaseList.add("52"); + allTrainCaseList.add("53"); + allTrainCaseList.add("54"); + allTrainCaseList.add("55"); + allTrainCaseList.add("57"); + allTrainCaseList.add("58"); + allTrainCaseList.add("59"); + allTrainCaseList.add("6"); + allTrainCaseList.add("60"); + allTrainCaseList.add("61"); + allTrainCaseList.add("62"); + allTrainCaseList.add("63"); + allTrainCaseList.add("64"); + allTrainCaseList.add("640"); + allTrainCaseList.add("641"); + allTrainCaseList.add("642"); + allTrainCaseList.add("643"); + allTrainCaseList.add("644"); + allTrainCaseList.add("645"); + allTrainCaseList.add("646"); + allTrainCaseList.add("647"); + allTrainCaseList.add("648"); + allTrainCaseList.add("649"); + allTrainCaseList.add("65"); + allTrainCaseList.add("650"); + allTrainCaseList.add("651"); + allTrainCaseList.add("652"); + allTrainCaseList.add("653"); + allTrainCaseList.add("654"); + allTrainCaseList.add("655"); + allTrainCaseList.add("656"); + allTrainCaseList.add("657"); + allTrainCaseList.add("658"); + allTrainCaseList.add("659"); + allTrainCaseList.add("66"); + allTrainCaseList.add("660"); + allTrainCaseList.add("661"); + allTrainCaseList.add("662"); + allTrainCaseList.add("663"); + allTrainCaseList.add("664"); + allTrainCaseList.add("665"); + allTrainCaseList.add("666"); + allTrainCaseList.add("667"); + allTrainCaseList.add("668"); + allTrainCaseList.add("669"); + allTrainCaseList.add("67"); + allTrainCaseList.add("670"); + allTrainCaseList.add("671"); + allTrainCaseList.add("672"); + allTrainCaseList.add("673"); + allTrainCaseList.add("674"); + allTrainCaseList.add("675"); + allTrainCaseList.add("676"); + allTrainCaseList.add("677"); + allTrainCaseList.add("678"); + allTrainCaseList.add("679"); + allTrainCaseList.add("68"); + allTrainCaseList.add("680"); + allTrainCaseList.add("681"); + allTrainCaseList.add("682"); + allTrainCaseList.add("683"); + allTrainCaseList.add("684"); + allTrainCaseList.add("685"); + allTrainCaseList.add("686"); + allTrainCaseList.add("687"); + allTrainCaseList.add("688"); + allTrainCaseList.add("689"); + allTrainCaseList.add("69"); + allTrainCaseList.add("690"); + allTrainCaseList.add("691"); + allTrainCaseList.add("692"); + allTrainCaseList.add("693"); + allTrainCaseList.add("694"); + allTrainCaseList.add("695"); + allTrainCaseList.add("696"); + allTrainCaseList.add("697"); + allTrainCaseList.add("698"); + allTrainCaseList.add("699"); + allTrainCaseList.add("7"); + allTrainCaseList.add("70"); + allTrainCaseList.add("700"); + allTrainCaseList.add("701"); + allTrainCaseList.add("702"); + allTrainCaseList.add("703"); + allTrainCaseList.add("704"); + allTrainCaseList.add("705"); + allTrainCaseList.add("707"); + allTrainCaseList.add("708"); + allTrainCaseList.add("709"); + allTrainCaseList.add("71"); + allTrainCaseList.add("710"); + allTrainCaseList.add("711"); + allTrainCaseList.add("712"); + allTrainCaseList.add("713"); + allTrainCaseList.add("714"); + allTrainCaseList.add("715"); + allTrainCaseList.add("716"); + allTrainCaseList.add("717"); + allTrainCaseList.add("718"); + allTrainCaseList.add("719"); + allTrainCaseList.add("72"); + allTrainCaseList.add("720"); + allTrainCaseList.add("721"); + allTrainCaseList.add("722"); + allTrainCaseList.add("723"); + allTrainCaseList.add("724"); + allTrainCaseList.add("725"); + allTrainCaseList.add("726"); + allTrainCaseList.add("727"); + allTrainCaseList.add("728"); + allTrainCaseList.add("729"); + allTrainCaseList.add("73"); + allTrainCaseList.add("730"); + allTrainCaseList.add("731"); + allTrainCaseList.add("732"); + allTrainCaseList.add("733"); + allTrainCaseList.add("734"); + allTrainCaseList.add("735"); + allTrainCaseList.add("736"); + allTrainCaseList.add("737"); + allTrainCaseList.add("738"); + allTrainCaseList.add("739"); + allTrainCaseList.add("74"); + allTrainCaseList.add("740"); + allTrainCaseList.add("741"); + allTrainCaseList.add("742"); + allTrainCaseList.add("743"); + allTrainCaseList.add("744"); + allTrainCaseList.add("745"); + allTrainCaseList.add("746"); + allTrainCaseList.add("747"); + allTrainCaseList.add("748"); + allTrainCaseList.add("749"); + allTrainCaseList.add("75"); + allTrainCaseList.add("750"); + allTrainCaseList.add("751"); + allTrainCaseList.add("752"); + allTrainCaseList.add("753"); + allTrainCaseList.add("754"); + allTrainCaseList.add("755"); + allTrainCaseList.add("756"); + allTrainCaseList.add("757"); + allTrainCaseList.add("758"); + allTrainCaseList.add("759"); + allTrainCaseList.add("76"); + allTrainCaseList.add("760"); + allTrainCaseList.add("761"); + allTrainCaseList.add("762"); + allTrainCaseList.add("763"); + allTrainCaseList.add("764"); + allTrainCaseList.add("765"); + allTrainCaseList.add("766"); + allTrainCaseList.add("767"); + allTrainCaseList.add("768"); + allTrainCaseList.add("769"); + allTrainCaseList.add("770"); + allTrainCaseList.add("771"); + allTrainCaseList.add("772"); + allTrainCaseList.add("773"); + allTrainCaseList.add("774"); + allTrainCaseList.add("775"); + allTrainCaseList.add("776"); + allTrainCaseList.add("777"); + allTrainCaseList.add("778"); + allTrainCaseList.add("779"); + allTrainCaseList.add("78"); + allTrainCaseList.add("780"); + allTrainCaseList.add("781"); + allTrainCaseList.add("782"); + allTrainCaseList.add("783"); + allTrainCaseList.add("784"); + allTrainCaseList.add("785"); + allTrainCaseList.add("786"); + allTrainCaseList.add("787"); + allTrainCaseList.add("788"); + allTrainCaseList.add("789"); + allTrainCaseList.add("79"); + allTrainCaseList.add("790"); + allTrainCaseList.add("791"); + allTrainCaseList.add("792"); + allTrainCaseList.add("793"); + allTrainCaseList.add("794"); + allTrainCaseList.add("795"); + allTrainCaseList.add("796"); + allTrainCaseList.add("797"); + allTrainCaseList.add("798"); + allTrainCaseList.add("799"); + allTrainCaseList.add("8"); + allTrainCaseList.add("80"); + allTrainCaseList.add("800"); + allTrainCaseList.add("801"); + allTrainCaseList.add("802"); + allTrainCaseList.add("803"); + allTrainCaseList.add("804"); + allTrainCaseList.add("805"); + allTrainCaseList.add("806"); + allTrainCaseList.add("807"); + allTrainCaseList.add("808"); + allTrainCaseList.add("809"); + allTrainCaseList.add("81"); + allTrainCaseList.add("810"); + allTrainCaseList.add("811"); + allTrainCaseList.add("812"); + allTrainCaseList.add("813"); + allTrainCaseList.add("814"); + allTrainCaseList.add("815"); + allTrainCaseList.add("816"); + allTrainCaseList.add("817"); + allTrainCaseList.add("818"); + allTrainCaseList.add("819"); + allTrainCaseList.add("82"); + allTrainCaseList.add("820"); + allTrainCaseList.add("821"); + allTrainCaseList.add("822"); + allTrainCaseList.add("823"); + allTrainCaseList.add("824"); + allTrainCaseList.add("825"); + allTrainCaseList.add("826"); + allTrainCaseList.add("827"); + allTrainCaseList.add("828"); + allTrainCaseList.add("829"); + allTrainCaseList.add("83"); + allTrainCaseList.add("830"); + allTrainCaseList.add("831"); + allTrainCaseList.add("832"); + allTrainCaseList.add("833"); + allTrainCaseList.add("834"); + allTrainCaseList.add("835"); + allTrainCaseList.add("836"); + allTrainCaseList.add("837"); + allTrainCaseList.add("838"); + allTrainCaseList.add("839"); + allTrainCaseList.add("84"); + allTrainCaseList.add("840"); + allTrainCaseList.add("841"); + allTrainCaseList.add("842"); + allTrainCaseList.add("843"); + allTrainCaseList.add("844"); + allTrainCaseList.add("845"); + allTrainCaseList.add("846"); + allTrainCaseList.add("847"); + allTrainCaseList.add("848"); + allTrainCaseList.add("849"); + allTrainCaseList.add("85"); + allTrainCaseList.add("850"); + allTrainCaseList.add("851"); + allTrainCaseList.add("852"); + allTrainCaseList.add("853"); + allTrainCaseList.add("854"); + allTrainCaseList.add("855"); + allTrainCaseList.add("856"); + allTrainCaseList.add("857"); + allTrainCaseList.add("858"); + allTrainCaseList.add("859"); + allTrainCaseList.add("86"); + allTrainCaseList.add("860"); + allTrainCaseList.add("861"); + allTrainCaseList.add("862"); + allTrainCaseList.add("863"); + allTrainCaseList.add("864"); + allTrainCaseList.add("865"); + allTrainCaseList.add("866"); + allTrainCaseList.add("867"); + allTrainCaseList.add("868"); + allTrainCaseList.add("869"); + allTrainCaseList.add("87"); + allTrainCaseList.add("870"); + allTrainCaseList.add("871"); + allTrainCaseList.add("872"); + allTrainCaseList.add("873"); + allTrainCaseList.add("874"); + allTrainCaseList.add("875"); + allTrainCaseList.add("876"); + allTrainCaseList.add("877"); + allTrainCaseList.add("878"); + allTrainCaseList.add("879"); + allTrainCaseList.add("88"); + allTrainCaseList.add("880"); + allTrainCaseList.add("881"); + allTrainCaseList.add("882"); + allTrainCaseList.add("883"); + allTrainCaseList.add("884"); + allTrainCaseList.add("885"); + allTrainCaseList.add("886"); + allTrainCaseList.add("887"); + allTrainCaseList.add("888"); + allTrainCaseList.add("889"); + allTrainCaseList.add("89"); + allTrainCaseList.add("890"); + allTrainCaseList.add("891"); + allTrainCaseList.add("892"); + allTrainCaseList.add("893"); + allTrainCaseList.add("894"); + allTrainCaseList.add("895"); + allTrainCaseList.add("896"); + allTrainCaseList.add("897"); + allTrainCaseList.add("898"); + allTrainCaseList.add("899"); + allTrainCaseList.add("9"); + allTrainCaseList.add("90"); + allTrainCaseList.add("900"); + allTrainCaseList.add("901"); + allTrainCaseList.add("902"); + allTrainCaseList.add("903"); + allTrainCaseList.add("905"); + allTrainCaseList.add("906"); + allTrainCaseList.add("907"); + allTrainCaseList.add("908"); + allTrainCaseList.add("909"); + allTrainCaseList.add("91"); + allTrainCaseList.add("910"); + allTrainCaseList.add("911"); + allTrainCaseList.add("912"); + allTrainCaseList.add("913"); + allTrainCaseList.add("914"); + allTrainCaseList.add("915"); + allTrainCaseList.add("916"); + allTrainCaseList.add("917"); + allTrainCaseList.add("918"); + allTrainCaseList.add("919"); + allTrainCaseList.add("92"); + allTrainCaseList.add("920"); + allTrainCaseList.add("921"); + allTrainCaseList.add("922"); + allTrainCaseList.add("93"); + allTrainCaseList.add("94"); + allTrainCaseList.add("95"); + allTrainCaseList.add("96"); + allTrainCaseList.add("98"); + allTrainCaseList.add("99"); + } + + return allTrainCaseList; + } + + private boolean isTrainCase(String id) + { + if (this.getAllTrainCaseList().contains(id)) + { + return true; + } + else + { + return false; + } + } + /** + * @param args + * @throws Exception + */ + public static void main(String[] args) throws Exception + { + if (args.length!=2) + { + System.out.println("USAGE:\t\t XmlToText inDir outDir"); + } + + XmlToTextI2B2 runner = new XmlToTextI2B2(args[0], args[1]); + runner.run(); + } + +} Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/oneoff/XmlToTextI2B2.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotation.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotation.java?rev=1500511&view=auto ============================================================================== --- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotation.java (added) +++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotation.java Sun Jul 7 19:23:05 2013 @@ -0,0 +1,72 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + ******************************************************************************/ +package org.spin.scrubber.protege.beans; + +import com.thoughtworks.xstream.annotations.XStreamAlias; + +@XStreamAlias("annotation") +public class Annotation +{ + private Mention mention; + private Annotator annotator; + private Span span; + private String spannedText; + + public String getSpannedText() + { + return spannedText; + } + + public void setSpannedText(String spannedText) + { + this.spannedText = spannedText; + } + + public Mention getMention() + { + return mention; + } + + public void setMention(Mention mention) + { + this.mention = mention; + } + + public void setAnnotator(Annotator annotator) + { + this.annotator = annotator; + } + + public Annotator getAnnotator() + { + return annotator; + } + + public Span getSpan() + { + return span; + } + + public void setSpan(Span span) + { + this.span = span; + } + + +} Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotation.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotations.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotations.java?rev=1500511&view=auto ============================================================================== --- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotations.java (added) +++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotations.java Sun Jul 7 19:23:05 2013 @@ -0,0 +1,65 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + ******************************************************************************/ +package org.spin.scrubber.protege.beans; + +import java.util.ArrayList; +import java.util.List; + +import com.thoughtworks.xstream.annotations.XStreamAlias; +import com.thoughtworks.xstream.annotations.XStreamAsAttribute; +import com.thoughtworks.xstream.annotations.XStreamImplicit; + +@XStreamAlias("annotations") +public class Annotations +{ + @XStreamAlias("textSource") + @XStreamAsAttribute + private String textSource; + + @XStreamImplicit(itemFieldName="annotation") + private List annotList = new ArrayList(); + + @XStreamImplicit(itemFieldName="classMention") + private List cmList = new ArrayList(); + + public List getAnnotList() + { + return annotList; + } + public void setAnnotList(List annotList) + { + this.annotList = annotList; + } + public List getCmList() + { + return cmList; + } + public void setCmList(List cmList) + { + this.cmList = cmList; + } + public String getTextSource() + { + return textSource; + } + public void setTextSource(String textSource) + { + this.textSource = textSource; + } +} Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotations.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotator.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotator.java?rev=1500511&view=auto ============================================================================== --- ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotator.java (added) +++ ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotator.java Sun Jul 7 19:23:05 2013 @@ -0,0 +1,60 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + ******************************************************************************/ +package org.spin.scrubber.protege.beans; + +import com.thoughtworks.xstream.annotations.XStreamAlias; +import com.thoughtworks.xstream.annotations.XStreamAsAttribute; +import com.thoughtworks.xstream.annotations.XStreamConverter; +import com.thoughtworks.xstream.converters.extended.ToAttributedValueConverter; + +@XStreamAlias("annotator") +@XStreamConverter(value=ToAttributedValueConverter.class, strings={"name"}) +public class Annotator +{ + @XStreamAlias("id") + @XStreamAsAttribute + private String id; + + private String name; + + public Annotator() + { + } + public Annotator(String id, String name) + { + this.setId(id); + this.setName(name); + } + public String getId() + { + return id; + } + public void setId(String id) + { + this.id = id; + } + public String getName() + { + return name; + } + public void setName(String name) + { + this.name = name; + } +} Propchange: ctakes/sandbox/ctakes-scrubber-deid/src/main/java/org/spin/scrubber/protege/beans/Annotator.java ------------------------------------------------------------------------------ svn:mime-type = text/plain