opennlp-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bgalit...@apache.org
Subject [09/11] opennlp-sandbox git commit: removed stanford nlp refs
Date Tue, 22 Nov 2016 13:05:23 GMT
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelBasedClassifier.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelBasedClassifier.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelBasedClassifier.java
deleted file mode 100644
index d1c80ad..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelBasedClassifier.java
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.kernel_interface;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.logging.Logger;
-
-import org.apache.commons.io.FileUtils;
-
-
-import org.apache.tika.Tika;
-import org.apache.tika.exception.TikaException;
-
-import opennlp.tools.jsmlearning.ProfileReaderWriter;
-import opennlp.tools.parse_thicket.ParseThicket;
-import opennlp.tools.parse_thicket.VerbNetProcessor;
-import opennlp.tools.parse_thicket.apps.MultiSentenceSearchResultsProcessor;
-import opennlp.tools.parse_thicket.matching.Matcher;
-
-public class TreeKernelBasedClassifier {
-	protected static Logger LOG = Logger
-			.getLogger("opennlp.tools.similarity.apps.TreeKernelBasedClassifier");
-	protected ArrayList<File> queuePos = new ArrayList<File>(), queueNeg = new ArrayList<File>();
-  
-	protected Matcher matcher = new Matcher();
-	protected TreeKernelRunner tkRunner = new TreeKernelRunner();
-	protected TreeExtenderByAnotherLinkedTree treeExtender = new TreeExtenderByAnotherLinkedTree();
-
-
-	protected String path;
-	public void setKernelPath (String path){
-		this.path=path;
-	}
-	protected static final String modelFileName = "model.txt";
-
-	protected static final String trainingFileName = "training.txt";
-
-	protected static final String unknownToBeClassified = "unknown.txt";
-
-	protected static final String classifierOutput = "classifier_output.txt";
-	protected static final Float MIN_SVM_SCORE_TOBE_IN = 0.2f;
-	
-	/* main entry point to SVM TK classifier
-     * gets a file, reads it outside of CI, extracts longer paragraphs and builds parse thickets for them.
-     * Then parse thicket dump is processed by svm_classify
-     */
-	public Boolean classifyText(File f){
-		FileUtils.deleteQuietly(new File(path+unknownToBeClassified)); 
-		if (!(new File(path+modelFileName).exists())){
-			LOG.severe("Model file '" +modelFileName + "'is absent: skip SVM classification");
-			return null;
-		}
-		Map<Integer, Integer> countObject = new HashMap<Integer, Integer>(); 
-		int itemCount=0, objectCount = 0;
-		List<String> treeBankBuffer = new ArrayList<String>();	
-		List<String> texts=DescriptiveParagraphFromDocExtractor.getLongParagraphsFromFile(f);
-		List<String> lines = formTreeKernelStructuresMultiplePara(texts, "0");
-		for(String l: lines){
-			countObject.put(itemCount, objectCount);
-			itemCount++;
-		}
-		objectCount++;
-		treeBankBuffer.addAll(lines);		
-
-		// write the lists of samples to a file
-		try {
-			FileUtils.writeLines(new File(path+unknownToBeClassified), null, treeBankBuffer);
-		} catch (IOException e) {
-			LOG.severe("Problem creating parse thicket files '"+ path+unknownToBeClassified + "' to be classified\n"+ e.getMessage() );
-		}
-
-		tkRunner.runClassifier(path, unknownToBeClassified, modelFileName, classifierOutput);
-		// read classification results
-		List<String[]> classifResults = ProfileReaderWriter.readProfiles(path+classifierOutput, ' ');
-
-
-		itemCount=0; objectCount = 0;
-		int currentItemCount=0;
-		float accum = 0;
-		LOG.info("\nsvm scores per paragraph: " );
-		for(String[] line: classifResults){
-			Float val = Float.parseFloat(line[0]);
-			System.out.print(val+" ");
-			accum+=val;
-			currentItemCount++;
-		}
-
-		float averaged = accum/(float)currentItemCount;
-		LOG.info("\n average = "+averaged);
-		currentItemCount=0;
-		Boolean in = false;
-		if (averaged> MIN_SVM_SCORE_TOBE_IN)
-			return true;
-		else
-			return false;
-	}
-
-	protected void addFilesPos(File file) {
-
-		if (!file.exists()) {
-			System.out.println(file + " does not exist.");
-		}
-		if (file.isDirectory()) {
-			for (File f : file.listFiles()) {
-				//if (!(f.getName().endsWith(".txt") || f.getName().endsWith(".pdf")))
-				//	continue;
-				addFilesPos(f);
-				System.out.println(f.getName());
-			}
-		} else {
-			queuePos.add(file);
-		}
-	}
-	
-	protected void addFilesNeg(File file) {
-
-		if (!file.exists()) {
-			System.out.println(file + " does not exist.");
-		}
-		if (file.isDirectory()) {
-			for (File f : file.listFiles()) {
-				//if (!(f.getName().endsWith(".txt")||f.getName().endsWith(".pdf")))
-				//	continue;
-				addFilesNeg(f);
-				System.out.println(f.getName());
-			}
-		} else {
-			queueNeg.add(file);
-		}
-	}
-
-	protected void trainClassifier(
-			String posDirectory, String negDirectory) {
-		
-		queuePos.clear(); queueNeg.clear();
-		addFilesPos(new File(posDirectory));
-		addFilesNeg(new File(negDirectory));
-		
-		List<File> filesPos = new ArrayList<File>(queuePos), filesNeg = new ArrayList<File>(queueNeg);
-		
-		List<String[]> treeBankBuffer = new ArrayList<String[]>();
-
-		for (File f : filesPos) {
-			// get first paragraph of text
-			String text=DescriptiveParagraphFromDocExtractor.getFirstParagraphFromFile(f);		
-			treeBankBuffer.add(new String[]{formTreeKernelStructure(text, "1")});		
-		}	
-		for (File f : filesNeg) {
-			// get first paragraph of text
-			String text=DescriptiveParagraphFromDocExtractor.getFirstParagraphFromFile(f);
-			treeBankBuffer.add(new String[]{formTreeKernelStructure(text, "-1")});		
-		}	
-		
-		// write the lists of samples to a file
-		ProfileReaderWriter.writeReport(treeBankBuffer, path+trainingFileName, ' ');
-		// build the model
-		tkRunner.runLearner(path, trainingFileName, modelFileName);
-	}
-
-	public List<String[]> classifyFilesInDirectory(String dirFilesToBeClassified){
-		List<String[]> treeBankBuffer = new ArrayList<String[]>();
-		queuePos.clear();
-		addFilesPos(new File( dirFilesToBeClassified));
-		List<File> filesUnkn = new ArrayList<File>(queuePos);
-		for (File f : filesUnkn) {	
-			String text=DescriptiveParagraphFromDocExtractor.getFirstParagraphFromFile(f);
-			String line = formTreeKernelStructure(text, "0");
-			treeBankBuffer.add(new String[]{line});		
-		}	
-	
-		// form a file from the texts to be classified
-		ProfileReaderWriter.writeReport(treeBankBuffer, path+unknownToBeClassified, ' ');
-		
-		tkRunner.runClassifier(path, unknownToBeClassified, modelFileName, classifierOutput);
-		// read classification results
-		List<String[]> classifResults = ProfileReaderWriter.readProfiles(path+classifierOutput, ' ');
-		// iterate through classification results and set them as scores for hits
-		List<String[]>results = new ArrayList<String[]>();
-		int count=0;
-		for(String[] line: classifResults){
-			Float val = Float.parseFloat(line[0]);
-			Boolean in = false;
-			if (val> MIN_SVM_SCORE_TOBE_IN)
-				in = true;
-			
-			String[] rline = new String[]{filesUnkn.get(count).getName(), in.toString(), line[0], filesUnkn.get(count).getAbsolutePath() }; // treeBankBuffer.get(count).toString() };
-			results.add(rline);
-			count++;
-			
-		}
-		return results;
-
-	}
-
-	protected List<String> formTreeKernelStructuresMultiplePara(List<String> texts, String flag) {
-		List<String> extendedTreesDumpTotal = new ArrayList<String>();
-		try {
-
-			for(String text: texts){
-				// get the parses from original documents, and form the training dataset
-				LOG.info("About to build pt from "+text);
-				ParseThicket pt = matcher.buildParseThicketFromTextWithRST(text);
-				LOG.info("About to build extended forest ");
-				List<String> extendedTreesDump = treeExtender.buildForestForCorefArcs(pt);
-				for(String line: extendedTreesDump)
-					extendedTreesDumpTotal.add(flag + " |BT| "+line + " |ET| ");
-				LOG.info("DONE");
-			}
-
-		} catch (Exception e) {
-			LOG.severe("Problem forming  parse thicket flat file to be classified\n"+ e.getMessage() );
-		}
-		return extendedTreesDumpTotal;
-	}
-	protected String formTreeKernelStructure(String text, String flag) {
-		String treeBankBuffer = "";
-		try {
-			// get the parses from original documents, and form the training dataset
-			LOG.info("About to build pt from "+text);
-			ParseThicket pt = matcher.buildParseThicketFromTextWithRST(text);
-			LOG.info("About to build extended forest ");
-			List<String> extendedTreesDump = treeExtender.buildForestForCorefArcs(pt);
-			LOG.info("DONE");
-
-			treeBankBuffer+=flag;
-			// form the list of training samples
-			for(String t: extendedTreesDump ){
-				if (BracesProcessor.isBalanced(t))
-					treeBankBuffer+=" |BT| "+t;
-				else
-					System.err.println("Wrong tree: " + t);
-			}
-			if (extendedTreesDump.size()<1)
-				treeBankBuffer+=" |BT| ";
-		} catch (Exception e) {
-			e.printStackTrace();
-		}
-		return treeBankBuffer+ " |ET|";
-	}
-
-	public static void main(String[] args){
-		VerbNetProcessor p = VerbNetProcessor.
-				getInstance("/Users/borisgalitsky/Documents/workspace/deepContentInspection/src/test/resources"); 
-				
-		TreeKernelBasedClassifier proc = new TreeKernelBasedClassifier();
-		proc.setKernelPath("/Users/borisgalitsky/Documents/tree_kernel/");
-		proc.trainClassifier(args[0], args[1]);
-		List<String[]>res = proc.classifyFilesInDirectory(args[2]);
-		ProfileReaderWriter.writeReport(res, "svmDesignDocReport03minus.csv");
-	}
-
-}

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelBasedClassifierMultiplePara.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelBasedClassifierMultiplePara.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelBasedClassifierMultiplePara.java
deleted file mode 100644
index 45fb98c..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelBasedClassifierMultiplePara.java
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.kernel_interface;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.logging.Logger;
-
-import org.apache.commons.io.FileUtils;
-
-
-import org.apache.tika.Tika;
-import org.apache.tika.exception.TikaException;
-
-import opennlp.tools.jsmlearning.ProfileReaderWriter;
-import opennlp.tools.parse_thicket.ParseThicket;
-import opennlp.tools.parse_thicket.VerbNetProcessor;
-import opennlp.tools.parse_thicket.apps.MultiSentenceSearchResultsProcessor;
-import opennlp.tools.parse_thicket.matching.Matcher;
-
-public class TreeKernelBasedClassifierMultiplePara extends TreeKernelBasedClassifier{
-	boolean bShortRun = false;
-	public void setShortRun(){
-		bShortRun = true;
-	}
-
-
-	public void trainClassifier(
-			String posDirectory, String negDirectory) {
-
-		queuePos.clear(); queueNeg.clear();
-		addFilesPos(new File(posDirectory));
-		addFilesNeg(new File(negDirectory));
-
-		List<File> filesPos = new ArrayList<File>(queuePos), filesNeg = new ArrayList<File>(queueNeg);
-
-		Collection<String> treeBankBuffer = new ArrayList<String>();
-		int countPos=0, countNeg=0;
-
-		for (File f : filesPos) {
-			// get first paragraph of text
-			List<String> texts=DescriptiveParagraphFromDocExtractor.getLongParagraphsFromFile(f);		
-			List<String> lines = formTreeKernelStructuresMultiplePara(texts, "1");
-			treeBankBuffer.addAll(lines);		
-			if (bShortRun && countPos>3000)
-				break;
-
-			countPos++;
-		}	
-		for (File f : filesNeg) {
-			// get first paragraph of text 
-			List<String> texts=DescriptiveParagraphFromDocExtractor.getLongParagraphsFromFile(f);	
-			List<String> lines = formTreeKernelStructuresMultiplePara(texts, "-1");
-			treeBankBuffer.addAll(lines);	
-			if (bShortRun && countNeg>3000)
-				break;
-
-			countNeg++;
-		}	
-
-		// write the lists of samples to a file
-		try {
-			FileUtils.writeLines(new File(path+trainingFileName), null, treeBankBuffer);
-		} catch (IOException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
-		//	ProfileReaderWriter.writeReport(treeBankBuffer, path+trainingFileName, ' ');
-		// build the model
-		tkRunner.runLearner(path, trainingFileName, modelFileName);
-	}
-
-	public List<String[]> classifyFilesInDirectory(String dirFilesToBeClassified){
-		Map<Integer, Integer> countObject = new HashMap<Integer, Integer>(); 
-		int itemCount=0, objectCount = 0;
-		List<String> treeBankBuffer = new ArrayList<String>();
-		queuePos.clear();
-		addFilesPos(new File( dirFilesToBeClassified));
-		List<File> filesUnkn = new ArrayList<File>(queuePos);
-		for (File f : filesUnkn) {	
-			List<String> texts=DescriptiveParagraphFromDocExtractor.getLongParagraphsFromFile(f);
-			List<String> lines = formTreeKernelStructuresMultiplePara(texts, "0");
-			for(String l: lines){
-				countObject.put(itemCount, objectCount);
-				itemCount++;
-			}
-			objectCount++;
-			treeBankBuffer.addAll(lines);		
-		}	
-		// write the lists of samples to a file
-		try {
-			FileUtils.writeLines(new File(path+unknownToBeClassified), null, treeBankBuffer);
-		} catch (IOException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
-
-		tkRunner.runClassifier(path, unknownToBeClassified, modelFileName, classifierOutput);
-		// read classification results
-		List<String[]> classifResults = ProfileReaderWriter.readProfiles(path+classifierOutput, ' ');
-		// iterate through classification results and set them as scores for hits
-		List<String[]>results = new ArrayList<String[]>();
-
-		itemCount=0; objectCount = 0;
-		int currentItemCount=0;
-		float accum = 0;
-		for(String[] line: classifResults){
-			Float val = Float.parseFloat(line[0]);
-			accum+=val;
-			// last line
-			Boolean bLastLine = false;
-			if (itemCount==classifResults.size()-1)
-				bLastLine = true;
-
-			if (objectCount== countObject .get(itemCount) /*&& !bLastLine*/){
-				itemCount++; 
-				currentItemCount++;
-				continue;
-			}
-			else while(objectCount!= countObject .get(itemCount)-1){
-				objectCount++;
-				String[] rline = new String[]{filesUnkn.get(objectCount).getName(), "unknown", "0",
-						filesUnkn.get(objectCount).getAbsolutePath() , new Integer(itemCount).toString(), new Integer(objectCount).toString()}; 
-				results.add(rline);
-			}
-			objectCount = countObject.get(itemCount);
-			itemCount++; 
-
-			float averaged = accum/(float)currentItemCount;
-			currentItemCount=0;
-			Boolean in = false;
-			if (averaged> MIN_SVM_SCORE_TOBE_IN)
-				in = true;
-
-			String[] rline = new String[]{filesUnkn.get(objectCount).getName(), in.toString(), new Float(averaged).toString(),
-					filesUnkn.get(objectCount).getAbsolutePath() , new Integer(itemCount).toString(), new Integer(objectCount).toString()}; 
-			results.add(rline);
-			accum=0;
-		}
-		return results;
-
-	}
-
-
-	protected List<String> formTreeKernelStructuresMultiplePara(List<String> texts, String flag) {
-		List<String> extendedTreesDumpTotal = new ArrayList<String>();
-		try {
-			for(String text: texts){
-				// get the parses from original documents, and form the training dataset
-				System.out.println("About to build pt from "+text);
-				ParseThicket pt = matcher.buildParseThicketFromTextWithRST(text);
-				System.out.print("About to build extended forest ");
-				List<String> extendedTreesDump = treeExtender.buildForestForCorefArcs(pt);
-				for(String line: extendedTreesDump)
-					extendedTreesDumpTotal.add(flag + " |BT| "+line + " |ET| ");
-				System.out.println("DONE");
-			}
-
-		} catch (Exception e) {
-			e.printStackTrace();
-		}
-		return extendedTreesDumpTotal;
-	}
-
-	public static void main(String[] args){
-		VerbNetProcessor p = VerbNetProcessor.
-				getInstance("/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources"); 
-
-		TreeKernelBasedClassifierMultiplePara proc = new TreeKernelBasedClassifierMultiplePara();
-		proc.setKernelPath("/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources/tree_kernel/");
-		proc.trainClassifier(
-
-				"/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources/style_recognizer/txt/ted",
-				"/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources/style_recognizer/txt/Tedi");
-
-		//		List<String[]>res = proc.classifyFilesInDirectory(args[2]);
-		//		ProfileReaderWriter.writeReport(res, "svmDesignDocReport05plus.csv");
-	}
-
-}

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelBasedClassifierOfDiscourseTree.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelBasedClassifierOfDiscourseTree.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelBasedClassifierOfDiscourseTree.java
deleted file mode 100644
index 71e8245..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelBasedClassifierOfDiscourseTree.java
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.kernel_interface;
-
-
-import java.util.ArrayList;
-import java.util.List;
-
-import opennlp.tools.parse_thicket.ParseThicket;
-import opennlp.tools.parse_thicket.VerbNetProcessor;
-import opennlp.tools.parse_thicket.external_rst.MatcherExternalRST;
-import opennlp.tools.parse_thicket.external_rst.ParseThicketWithDiscourseTree;
-
-/*
- * This class performs TK learning based on parse thicket which includes RST relations only 
- * based on Surdeanu at al RST parser. It does sentence parsing and NLP pipeline of 
- * Surdeanu's wrapper of Stanford NLP
- */
-public class TreeKernelBasedClassifierOfDiscourseTree extends TreeKernelBasedClassifierMultiplePara{
-
-	private MatcherExternalRST matcherRST = new MatcherExternalRST();
-
-	protected List<String> formTreeKernelStructuresMultiplePara(List<String> texts, String flag) {
-		//TODO
-		//this.setShortRun();	
-		List<String> extendedTreesDumpTotal = new ArrayList<String>();
-		try {
-
-			for(String text: texts){
-				// get the parses from original documents, and form the training dataset
-				try {
-					System.out.print("About to build pt with external rst from "+text + "\n...");
-					ParseThicket pt = matcherRST.buildParseThicketFromTextWithRST(text);
-					if (pt == null)
-						continue;
-					System.out.print("About to build extended forest with external rst...");
-					List<String> extendedTreesDump =  // use direct option (true
-							buildReptresentationForDiscourseTreeAndExtensions((ParseThicketWithDiscourseTree)pt, false);
-									//true);
-					for(String line: extendedTreesDump)
-						extendedTreesDumpTotal.add(flag + " |BT| "+line + " |ET| ");
-					System.out.println("DONE");
-				} catch (Exception e) {
-					e.printStackTrace();
-				}
-			}
-		} catch (Exception e) {
-			e.printStackTrace();
-		}
-		return extendedTreesDumpTotal;
-	}
-
-	private List<String> buildReptresentationForDiscourseTreeAndExtensions(ParseThicketWithDiscourseTree pt, boolean bDirectDT){
-		List<String> extendedTreesDump = new ArrayList<String>();
-		if (!bDirectDT)
-			// option 1: use RST relation for extended trees 
-			extendedTreesDump = treeExtender.buildForestForRSTArcs(pt);
-		else {
-			// option 2: use DT directly
-			extendedTreesDump.add(pt.getDtDump());
-		    extendedTreesDump.add(pt.getDtDumpWithPOS());
-		    extendedTreesDump.add(pt.getDtDumpWithEmbeddedTrees());
-		    extendedTreesDump.add(pt.getDtDumpWithVerbNet());
-		}		
-		return extendedTreesDump;
-	}
-	
-	/*
-	 * dtDump
-	 * 1 |BT| (elaboration (joint (attribution (I though) (I d tell you a little about what I like to write )) (joint (And I like to immerse myself in my topics ) (joint (I just like to dive right i) (and become sort of a human guinea pig )))) (elaboration (joint (And I see my life as a series of experiments ) (joint (So , I work for Esquire magazine ) (elaboration (elaboration (and a couple of years ago I wrote an articl) (called My Outsourced Life )) (enablement (where I hired a team of people in Bangalore , India ) (to live my life for me ))))) (elaboration (So they answered my emails ) (They answered my phone )))) |ET|
-	 * 
-	 * getDtDumpWithPOS
-	 * 
-	 *  1 |BT| (elaboration (joint (attribution (I PRP)(thought VBD) (I PRP)(d NN)(tell VBP)(you PRP)(a DT)(little JJ)(about IN)(what WP)(I PRP)(like VBP)(to TO)(write VB)) (joint (And CC)(I PRP)(like VBP)(to TO)(immerse VB)(myself PRP)(in IN)(my PRP$)(topics NNS) (joint (I PRP)(just RB)(like VBP)(to TO)(dive NN)(right NN)(in IN) (and CC)(become VB)(sort NN)(of IN)(a DT)(human JJ)(guinea NN)(pig NN)))) (elaboration (joint (And CC)(I PRP)(see VBP)(my PRP$)(life NN)(as IN)(a DT)(series NN)(of IN)(experiments NNS) (joint (So RB)(I PRP)(work VBP)(for IN)(Esquire NNP)(magazine NN) (elaboration (elaboration (and CC)(a DT)(couple NN)(of IN)(years NNS)(ago IN)(I PRP)(wrote VBD)(an DT)(article NN) (called VBN)(My PRP$)(Outsourced JJ)(Life NNP)) (enablement (where WRB)(I PRP)(hired VBD)(a DT)(team NN)(of IN)(people NNS)(in IN)(Bangalore NNP)(India NNP) (to TO)(live VB)(my PRP$)(life NN)(for IN)(me PRP))))) (elaboration (So IN)(they PRP)(answered VBD)(my PRP$)(emails NNS) (They PRP)(answered VBD)
 (my PRP$)(phone NN)))) |ET| 
-	 * 
-	 * getDtDumpWithEmbeddedTrees()
-	 * 1 |BT| (elaboration (joint (attribution (SBAR (S (NP (PRP I)) (VP (ADVP (NN d)) (VBP tell) (NP (PRP you)) (PP (NP (DT a) (JJ little)) (IN about) (SBAR (WHNP (WP what)) (S (NP (PRP I)) (VP (VBP like) (S (VP (TO to) (VP (VB write))))))))))) (VBP tell)) (joint (VP (VBP like) (S (VP (TO to) (VP (VB immerse) (NP (PRP myself)) (PP (IN in) (NP (PRP$ my) (NNS topics))))))) (joint (VP (VP (VBP like) (PP (TO to) (NP (NN dive) (NN right))) (PP (IN in))) (CC and) (VP (VB become) (NP (NP (NN sort)) (PP (IN of) (NP (DT a) (JJ human) (NN guinea) (NN pig)))))) (NP (NP (NN sort)) (PP (IN of) (NP (DT a) (JJ human) (NN guinea) (NN pig))))))) (elaboration (joint (VP (VBP see) (NP (PRP$ my) (NN life)) (PP (IN as) (NP (NP (DT a) (NN series)) (PP (IN of) (NP (NNS experiments)))))) (joint (S (NP (PRP I)) (VP (VBP work) (PP (IN for) (NP (NNP Esquire) (NN magazine))))) (elaboration (elaboration (NN couple) (JJ Outsourced)) (enablement (VP (VBP work) (PP (IN for) (NP (NNP Esquire) (NN magazine)))) (NP (PR
 P$ my) (NN life)))))) (elaboration (VP (VBD answered) (NP (PRP$ my) (NNS emails))) (NP (PRP$ my) (NN phone))))) |ET|
-	 
-	 pt.getDtDumpWithVerbNet()
-	 1 |BT| (elaboration (joint (attribution (I PRP)(thought VBD) (I PRP)(d NN) (tell  (tell-372 tell-372 tell-372 ) (NP V NP NP V NP PP-topic NP V NP S ) (NP NP-PPof-PP NP-S ) ) (you PRP)(a DT)(little JJ)(about IN)(what WP)(I PRP)(like VBP)(to TO)(write VB)) (joint (And CC)(I PRP)(like VBP)(to TO)(immerse VB)(myself PRP)(in IN)(my PRP$)(topics NNS) (joint (I PRP)(just RB)(like VBP)(to TO)(dive NN)(right NN)(in IN) (and CC)(become VB)(sort NN)(of IN)(a DT)(human JJ)(guinea NN)(pig NN)))) (elaboration (joint (And CC)(I PRP) (see  (see-301 see-301 see-301 ) (NP V NP NP V that S NP V NP-ATTR-POS PP-oblique NP V how S NP V what S ) (Basic Transitive S Attribute Object Possessor-Attribute Factoring Alternation HOW-S WHAT-S ) ) (my PRP$)(life NN)(as IN)(a DT)(series NN)(of IN)(experiments NNS) (joint (So RB)(I PRP)(work VBP)(for IN)(Esquire NNP)(magazine NN) (elaboration (elaboration (and CC)(a DT)(couple NN)(of IN)(years NNS)(ago IN)(I PRP)(wrote VBD)(an DT)(article NN) (call  (dub-293 dub-
 293 dub-293 ) (NP V NP NP NP V NP ) (NP-NP Basic Transitive ) ) (My PRP$)(Outsourced JJ)(Life NNP)) (enablement (where WRB)(I PRP) (hire  (hire-1353 hire-1353 hire-1353 ) (NP V NP NP V NP PP-predicate ) (NP NP-PPas-PP ) ) (a DT)(team NN)(of IN)(people NNS)(in IN)(Bangalore NNP)(India NNP) (to TO)(live VB)(my PRP$)(life NN)(for IN)(me PRP))))) (elaboration (So IN)(they PRP)(answered VBD)(my PRP$)(emails NNS) (They PRP)(answered VBD)(my PRP$)(phone NN)))) |ET|
-	 *
-	 */
-	
-	
-	
-	public static void main(String[] args){
-		VerbNetProcessor p = VerbNetProcessor.
-				getInstance("/Users/borisgalitsky/Documents/workspace/relevance-based-on-parse-trees/src/test/resources"); 
-
-		TreeKernelBasedClassifierOfDiscourseTree proc = new TreeKernelBasedClassifierOfDiscourseTree();
-		proc.setKernelPath("/Users/borisgalitsky/Documents/workspace/relevance-based-on-parse-trees/src/test/resources/tree_kernel/");
-		proc.trainClassifier(
-//				"/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources/style_recognizer/txt/ted",
-//				"/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources/style_recognizer/txt/Tedi");
-				//"/Users/bgalitsky/Documents/ENRON/enron_random",
-				
-	//			"/Users/bgalitsky/Documents/ENRON/data11_17",
-	//	"/Users/bgalitsky/Documents/ENRON/enron_secrecy"
-	//			"/Users/bgalitsky/Downloads/op_spam_v1.4/positive_polarity/truthful_from_TripAdvisor",
-				"/Users/bgalitsky/Downloads/op_spam_v1.4/negative_polarity/deceptive_from_MTurk",
-				"/Users/bgalitsky/Downloads/op_spam_v1.4/negative_polarity/truthful_from_Web" 
-				);
-				
-//				"/Users/borisgalitsky/Documents/workspace/relevance-based-on-parse-trees/src/test/resources/style_recognizer/txt/ted",
-//				"/Users/borisgalitsky/Documents/workspace/relevance-based-on-parse-trees/src/test/resources/style_recognizer/txt/Tedi");
-
-	}
-
-}
-/*
- * 
-RST - based run
-Number of examples: 6980, linear space size: 10
-ted vs Tedi
-
-estimating ...
-Setting default regularization parameter C=1.0000
-Optimizing...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
- Checking optimality of inactive variables...done.
- Number of inactive variables = 1931
-done. (3597 iterations)
-Optimization finished (78 misclassified, maxdiff=0.00100).
-Runtime in cpu-seconds: 198.37
-Number of SV: 3830 (including 652 at upper bound)
-L1 loss: loss=261.78883
-Norm of weight vector: |w|=41.37067
-Norm of longest example vector: |x|=1.00000
-Estimated VCdim of classifier: VCdim<=1712.53247
-Computing XiAlpha-estimates...done
-Runtime for XiAlpha-estimates in cpu-seconds: 0.05
-XiAlpha-estimate of the error: error<=11.53% (rho=1.00,depth=0)
-XiAlpha-estimate of the recall: recall=>97.01% (rho=1.00,depth=0)
-XiAlpha-estimate of the precision: precision=>89.47% (rho=1.00,depth=0)
-Number of kernel evaluations: 73092240
-
-GENERAL RUN (the same set of texts)
-Number of examples: 21146, linear space size: 10
-
-estimating ...
-Setting default regularization parameter C=1.0000
-Optimizing...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
- Checking optimality of inactive variables...done.
- Number of inactive variables = 8849
-done. (5770 iterations)
-Optimization finished (231 misclassified, maxdiff=0.00098).
-Runtime in cpu-seconds: 1486.33
-Number of SV: 5368 (including 940 at upper bound)
-L1 loss: loss=582.99311
-Norm of weight vector: |w|=46.91885
-Norm of longest example vector: |x|=1.00000
-Estimated VCdim of classifier: VCdim<=2202.37876
-Computing XiAlpha-estimates...done
-Runtime for XiAlpha-estimates in cpu-seconds: 0.13
-XiAlpha-estimate of the error: error<=5.57% (rho=1.00,depth=0)
-XiAlpha-estimate of the recall: recall=>98.42% (rho=1.00,depth=0)
-XiAlpha-estimate of the precision: precision=>95.18% (rho=1.00,depth=0)
-Number of kernel evaluations: 550748695
-Writing model file...done
-
-
-Number of examples: 7461, linear space size: 10
-
-estimating ...
-Setting default regularization parameter C=1.0000
-Optimizing...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
- Checking optimality of inactive variables...done.
- Number of inactive variables = 2091
-done. (3773 iterations)
-Optimization finished (87 misclassified, maxdiff=0.00096).
-Runtime in cpu-seconds: 231.42
-Number of SV: 4092 (including 680 at upper bound)
-L1 loss: loss=280.03696
-Norm of weight vector: |w|=42.82963
-Norm of longest example vector: |x|=1.00000
-Estimated VCdim of classifier: VCdim<=1835.37688
-Computing XiAlpha-estimates...done
-Runtime for XiAlpha-estimates in cpu-seconds: 0.05
-XiAlpha-estimate of the error: error<=11.54% (rho=1.00,depth=0)
-XiAlpha-estimate of the recall: recall=>96.75% (rho=1.00,depth=0)
-XiAlpha-estimate of the precision: precision=>89.59% (rho=1.00,depth=0)
-Number of kernel evaluations: 94432306
-Writing model file...done
-
-
-
-SMALL SET
-
-Number of examples: 172, linear space size: 10
-
-estimating ...
-Setting default regularization parameter C=1.0000
-Optimizing.......................................................done. (56 iterations)
-Optimization finished (0 misclassified, maxdiff=0.00076).
-Runtime in cpu-seconds: 0.01
-Number of SV: 172 (including 59 at upper bound)
-L1 loss: loss=7.38525
-Norm of weight vector: |w|=12.46777
-Norm of longest example vector: |x|=1.00000
-Estimated VCdim of classifier: VCdim<=156.44537
-Computing XiAlpha-estimates...done
-Runtime for XiAlpha-estimates in cpu-seconds: 0.00
-XiAlpha-estimate of the error: error<=44.77% (rho=1.00,depth=0)
-XiAlpha-estimate of the recall: recall=>79.55% (rho=1.00,depth=0)
-XiAlpha-estimate of the precision: precision=>54.26% (rho=1.00,depth=0)
-Number of kernel evaluations: 20139
-Writing model file...done
-
-
-LONGER RUN, DTs only
-Number of examples: 720, linear space size: 10
-
-estimating ...
-Setting default regularization parameter C=1.0000
-Optimizing............................................................................................................................................................................................................................................................................
- Checking optimality of inactive variables...done.
- Number of inactive variables = 114
-done. (269 iterations)
-Optimization finished (11 misclassified, maxdiff=0.00096).
-Runtime in cpu-seconds: 0.17
-Number of SV: 712 (including 140 at upper bound)
-L1 loss: loss=117.83422
-Norm of weight vector: |w|=12.73402
-Norm of longest example vector: |x|=1.00000
-Estimated VCdim of classifier: VCdim<=163.15526
-Computing XiAlpha-estimates...done
-Runtime for XiAlpha-estimates in cpu-seconds: 0.00
-XiAlpha-estimate of the error: error<=20.14% (rho=1.00,depth=0)
-XiAlpha-estimate of the recall: recall=>99.14% (rho=1.00,depth=0)
-XiAlpha-estimate of the precision: precision=>80.42% (rho=1.00,depth=0)
-Number of kernel evaluations: 283615
-Writing model file...done
-
-HYBRID RUN
-Number of examples: 8301, linear space size: 10
-
-estimating ...
-Setting default regularization parameter C=1.0000
-Optimizing................................
- Checking optimality of inactive variables...done.
- Number of inactive variables = 2323
-done. (4206 iterations)
-Optimization finished (98 misclassified, maxdiff=0.00099).
-Runtime in cpu-seconds: 299.94
-Number of SV: 4870 (including 846 at upper bound)
-L1 loss: loss=398.61389
-Norm of weight vector: |w|=44.95124
-Norm of longest example vector: |x|=1.00000
-Estimated VCdim of classifier: VCdim<=2021.61414
-Computing XiAlpha-estimates...done
-Runtime for XiAlpha-estimates in cpu-seconds: 0.05
-XiAlpha-estimate of the error: error<=12.32% (rho=1.00,depth=0)
-XiAlpha-estimate of the recall: recall=>97.15% (rho=1.00,depth=0)
-XiAlpha-estimate of the precision: precision=>88.53% (rho=1.00,depth=0)
-Number of kernel evaluations: 138447398
-Writing model file...done
-
-HYBRID FULL RUN
-
-Number of examples: 2880, linear space size: 10
-
-estimating ...
-Setting default regularization parameter C=1.0021
-Optimizing...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
- Checking optimality of inactive variables...done.
- Number of inactive variables = 1035
-done. (1820 iterations)
-Optimization finished (162 misclassified, maxdiff=0.00099).
-Runtime in cpu-seconds: 1.35
-Number of SV: 1552 (including 556 at upper bound)
-L1 loss: loss=426.90789
-Norm of weight vector: |w|=25.52139
-Norm of longest example vector: |x|=1.00000
-Estimated VCdim of classifier: VCdim<=652.34149
-Computing XiAlpha-estimates...done
-Runtime for XiAlpha-estimates in cpu-seconds: 0.01
-XiAlpha-estimate of the error: error<=23.92% (rho=1.00,depth=0)
-XiAlpha-estimate of the recall: recall=>92.67% (rho=1.00,depth=0)
-XiAlpha-estimate of the precision: precision=>80.55% (rho=1.00,depth=0)
-Number of kernel evaluations: 4075095
-Writing model file...done
-
-
-
-
-
-positive vs negative sentiment
-"/Users/bgalitsky/Downloads/op_spam_v1.4/positive_polarity/truthful_from_TripAdvisor",
-				"/Users/bgalitsky/Downloads/op_spam_v1.4/negative_polarity/truthful_from_Web" 
-
-Number of examples: 15930, linear space size: 10
-
-estimating ...
-Setting default regularization parameter C=1.0000
-Optimizing...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
 ............................................................................................................................................................................
- Checking optimality of inactive variables...done.
- Number of inactive variables = 4348
-done. (11130 iterations)
-Optimization finished (14 misclassified, maxdiff=0.00098).
-Runtime in cpu-seconds: 2213.21
-Number of SV: 9219 (including 875 at upper bound)
-L1 loss: loss=126.05211
-Norm of weight vector: |w|=71.25103
-Norm of longest example vector: |x|=1.00000
-Estimated VCdim of classifier: VCdim<=5077.70889
-Computing XiAlpha-estimates...done
-Runtime for XiAlpha-estimates in cpu-seconds: 0.09
-XiAlpha-estimate of the error: error<=10.15% (rho=1.00,depth=0)
-XiAlpha-estimate of the recall: recall=>89.36% (rho=1.00,depth=0)
-XiAlpha-estimate of the precision: precision=>89.85% (rho=1.00,depth=0)
-Number of kernel evaluations: 837061668
-Writing model file...done
- */
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java
deleted file mode 100644
index 294fb38..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.parse_thicket.kernel_interface;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-
-public class TreeKernelRunner {
-	public void runEXE(String[] command, String runPath){
-		Runtime r = Runtime.getRuntime();
-		Process mStartProcess = null;
-		try {
-			mStartProcess = r.exec( command, null, new File(runPath));
-		} catch (IOException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
-
-		StreamLogger outputGobbler = new StreamLogger(mStartProcess.getInputStream());
-		outputGobbler.start();
-
-		try {
-			int returnCode = mStartProcess.waitFor();
-		} catch (InterruptedException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
-	}
-
-	public void runLearner(String dir, String learning_file, String  model_file)
-	{
-			if (!dir.endsWith("/"))
-				dir+="/";
-		String[] runString = new String[]{dir+"svm_learn","-t", "5","-j","2","-W","A", dir+learning_file,  dir+model_file};
-		runEXE(runString, dir);
-	}
-	public void runLearnerWin(String dir, String learning_file, String  model_file)
-	{
-		dir = dir.replace('/', '\\');
-		
-		if (!dir.endsWith("\\"))
-				dir+="\\";
-		String[] runString = new String[]{dir+"svm_learn.exe","-t", "5","-j","2","-W","A", dir+learning_file,  dir+model_file};
-		runEXE(runString, dir);
-	}
-	
-	
-	//svm_classify example_file model_file predictions_file
-	public void runClassifier(String dir, String example_file, String  model_file, String predictions_file)
-	{
-		if (!dir.endsWith("/"))
-				dir+="/";
-		String[] runString = new String[]{dir+"svm_classify", dir+example_file,  dir+model_file, dir+predictions_file};
-		runEXE(runString, dir);
-	}
-	public void runClassifierWin(String dir, String example_file, String  model_file, String predictions_file)
-	{
-		dir = dir.replace('/', '\\');
-		
-		if (!dir.endsWith("\\"))
-				dir+="\\";
-		String[] runString = new String[]{dir+"svm_classify.exe", dir+example_file,  dir+model_file, dir+predictions_file};
-		runEXE(runString, dir);
-	}
-
-	class StreamLogger extends Thread{
-
-		private InputStream mInputStream;
-
-		public StreamLogger(InputStream is) {
-			this.mInputStream = is;
-		}
-
-		public void run() {
-			try {
-				InputStreamReader isr = new InputStreamReader(mInputStream);
-				BufferedReader br = new BufferedReader(isr);
-				String line = null;
-				while ((line = br.readLine()) != null) {
-					System.out.println(line);
-				}
-			} catch (IOException ioe) {
-				ioe.printStackTrace();
-			}
-		}
-
-	}
-	
-	public static void main(String[] args){
-		TreeKernelRunner runner = new TreeKernelRunner();
-		runner.runLearner("C:\\stanford-corenlp\\tree_kernel\\", "training.txt", "arg0.model1.txt");
-		runner.runClassifier("C:\\stanford-corenlp\\tree_kernel\\", "arg0.test", "arg0.model1.txt", "arg0.output1.txt");
-	}
-}
-
-	/*
-exec:
-
-public Process exec(String command, String envp[], File dir) 
-
-
-
-   @param      command   a specified system command.
-   @param      envp      array of strings, each element of which 
-                         has environment variable settings in format
-                         <i>name</i>=<i>value</i>.
-   @param      dir       the working directory of the subprocess, or
-                         <tt>null</tt> if the subprocess should inherit
-                         the working directory of the current process.
-
-                         В ди�трибутиве два exe-файла: svm_learn.exe и svm_classify.exe.
-
-1.   svm_learn.exe берет файл � примерами, обрабатывает его, �троит файл model м правилами обучение.
-
-Примеры запу�ка: 
-svm_learn -t 5 learning_file model_file - �то �амый про�той вариант запу�ка, SubSetTreeKernel (допу�кают�� разрывы при обходе деревьев)
-
-svm_learn -t 5 -D 0 learning_file model_file - другой вариант �дра, SubTreeKernel
-
-Пример файла лежит на его �траничке. Там же опи�ание параметров.
-
-2. svm_classify.exe берет файл � те�товыми примерами, файл � моделью, по�троенный svm_learn, и запи�ывает результаты обучени� в файл predictions_file.
-
-Запу�к:     svm_classify example_file model_file predictions_file
-
-Файл имеет тот же формат, что и входные примеры. Образец лежит в архиве на �траничке Мо�китти. 
-Можно �разу же указывать, к какому кла��у отно�ит�� пример (1 или -1 в начале �троки). В �том �лучае точно�ть и полнота оценивают�� автоматиче�ки. Или �тавить там 0.
-	 */
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/style_classif/TSNE_ImporterProcessor.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/style_classif/TSNE_ImporterProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/style_classif/TSNE_ImporterProcessor.java
deleted file mode 100644
index ef00e94..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/style_classif/TSNE_ImporterProcessor.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.parse_thicket.kernel_interface.style_classif;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.charset.Charset;
-import java.util.HashMap;
-import java.util.Map;
-
-import opennlp.tools.parse_thicket.VerbNetProcessor;
-import opennlp.tools.parse_thicket.kernel_interface.TreeKernelBasedClassifierMultiplePara;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang.StringUtils;
-
-public class TSNE_ImporterProcessor {
-	private static String importFilePath = "all-tsne2.txt";
-	public String resourceWorkDir = new File(".").getAbsolutePath().replace("/.", "") + 
-			"/src/test/resources/style_recognizer/";
-
-	public void importFileCreatClassifDirs() {
-		Map<Integer, String> id_Text = new HashMap<Integer, String>();
-		Map<Integer, String> id_Label = new HashMap<Integer, String>();
-
-		try {
-			FileUtils.cleanDirectory(new File(resourceWorkDir+"/txt"));
-		} catch (IOException e2) {
-			e2.printStackTrace();
-		}
-
-		String text = null;
-		try {
-			text = FileUtils.readFileToString(new File(resourceWorkDir+importFilePath ), Charset.defaultCharset().toString());
-		} catch (IOException e) {
-
-			e.printStackTrace();
-		}
-
-		String[] portions = StringUtils.substringsBetween(text, "<text ", "/text>");
-		for(int i=0; i<portions.length; i++){
-			String label = StringUtils.substringBetween(portions[i], "id=\"", "\">");
-			String po =  StringUtils.substringBetween(portions[i],  "\">", "<");
-			id_Text.put(i, po);
-			id_Label.put(i, label);
-			if (true){
-				String localDirName = label.substring(0, 4);
-				if (!new File(resourceWorkDir+"txt/"+localDirName).exists())
-					try {
-						FileUtils.forceMkdir(new File(resourceWorkDir+"txt/"+localDirName));
-					} catch (IOException e1) {
-						e1.printStackTrace();
-					}
-				try {
-					label = label.replace('/', '_');
-					String fullPath = resourceWorkDir+"txt/"+localDirName+"/"+i+label+".txt";
-					FileUtils.writeStringToFile(new File(fullPath), po);
-				} catch (IOException e) {
-					e.printStackTrace();
-				}
-			}
-		}
-
-	}
-
-	public static void main(String[] args){
-		TSNE_ImporterProcessor thisProc = new TSNE_ImporterProcessor();
-		thisProc.importFileCreatClassifDirs();
-
-		VerbNetProcessor p = VerbNetProcessor.
-				getInstance("/Users/borisgalitsky/Documents/workspace/deepContentInspection/src/test/resources"); 
-
-		TreeKernelBasedClassifierMultiplePara proc = new TreeKernelBasedClassifierMultiplePara();
-		proc.setKernelPath("/Users/borisgalitsky/Documents/tree_kernel/");
-		proc.trainClassifier(thisProc.resourceWorkDir+"/txt/Tele", 
-				thisProc.resourceWorkDir+"/txt/Tels");
-		//www.sciencedirect.com/science/article/pii/S095070511300138X
-	}
-}

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/FrameQueryBasedIExtractor.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/FrameQueryBasedIExtractor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/FrameQueryBasedIExtractor.java
deleted file mode 100644
index f75c0b1..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/FrameQueryBasedIExtractor.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.parse_thicket.matching;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import opennlp.tools.parse_thicket.VerbNetProcessor;
-import opennlp.tools.textsimilarity.ParseTreeChunk;
-import opennlp.tools.textsimilarity.TextProcessor;
-
-public class FrameQueryBasedIExtractor {
-	List<GeneralizationResult> templates = new ArrayList<GeneralizationResult>();
-	Matcher matcher = Matcher.getInstance();
-
-
-
-	private void init() {
-		templates.clear();
-
-	}
-	public void buildPTTemplates(String[] smpls){
-
-		GeneralizationResult templateCurr = matcher.assessRelevanceG(smpls[0], smpls[1]);
-		for(int i=2; i<smpls.length; i++){
-
-			templateCurr = matcher.assessRelevanceG(templateCurr, smpls[i]);
-		}
-
-		templates.add(templateCurr);
-		System.out.println("template = "+ templateCurr);
-
-	}
-
-	public void buildTemplates(String[] samples){
-		for(String setOfSamples : samples){
-			List<String> smpls = TextProcessor.splitToSentences(setOfSamples);
-			if (smpls.size()<2)
-				continue;
-			
-			GeneralizationResult templateCurr = matcher.assessRelevanceG(smpls.get(0), smpls.get(1));
-			for(int i=2; i<smpls.size(); i++){
-
-				templateCurr = matcher.assessRelevanceG(templateCurr, smpls.get(i));
-			}
-
-			templates.add(templateCurr);
-			System.out.println("template = "+ templateCurr+ "\n");
-		}
-	}
-	
-	public void buildTemplatesPairWise(String[] samples){
-		for(String setOfSamples : samples){
-			List<String> smpls = TextProcessor.splitToSentences(setOfSamples);
-
-			GeneralizationResult templateCurr = null;
-			for(int i=0; i<smpls.size(); i++)
-				for(int j=i+1; j< smpls.size(); j++){
-					templateCurr = matcher.assessRelevanceG(smpls.get(i), smpls.get(j));
-					templates.add(templateCurr);
-					System.out.println("template = "+ templateCurr+ "\n");
-			}
-		}
-	}
-
-	List<GeneralizationResult>  doIE(String text){
-		List<GeneralizationResult> fires = new ArrayList<GeneralizationResult>();
-
-		List<String> sentences = TextProcessor.splitToSentences(text);{
-			for(String sent: sentences){
-				for(GeneralizationResult t: templates){
-					GeneralizationResult res = matcher.assessRelevanceG(t.getGen(), sent);
-					boolean fire = matcher.isCoveredByTemplate(t.getGen(), res.getGen());
-					System.out.println(res+ " => "+ fire + "\n");
-					if (fire){
-						res.setIfFire(fire);
-						res.setText(sent);
-						fires.add(res);
-						System.out.println("=====================\n TEMPLATE FIRED: "+ sent + "\n====================\n");
-					}
-				}
-			}
-
-		}
-		return fires;
-	}
-
-	List<GeneralizationResult>  doIEforPT(String text){
-		List<GeneralizationResult> fires = new ArrayList<GeneralizationResult>();
-
-		for(GeneralizationResult t: templates){
-			GeneralizationResult res = matcher.assessRelevanceG(t.getGen(), text);
-			boolean fire = matcher.isCoveredByTemplate(t.getGen(), res.getGen());
-			System.out.println(res+ " =PT=> "+ fire + "\n");
-			res.setIfFire(fire);
-			res.setText(text);
-			if (fire)
-				fires.add(res);
-			
-		}
-		return fires;
-	}
-
-
-	public static void main(String[] args){
-		VerbNetProcessor.getInstance("/Users/borisgalitsky/Documents/workspace/opennlp-similarity/src/test/resources");
-		FrameQueryBasedIExtractor extractor = new FrameQueryBasedIExtractor();
-		
-		String[] texts = new String[]{"An amusement park sells adult tickets for $3 and kids tickets for $2, and got the revenue $500 yesterday.",
-						"A certified trainer conducts training for adult customers for $30 per hour and kid customer for $20 per hour, and got the revenue $1000 today."};		
-		extractor.buildPTTemplates(texts);
-		
-		 texts = new String[]{"Crossing the snow slope was dangerous. They informed in the blog that an ice axe should be used. However, I am reporting that crossing the snow field in the late afternoon I had to use crampons.",
-				"I could not cross the snow creek since it was dangerous. This was because the previous hiker reported that ice axe should be used in late afternoon.  To inform the fellow hikers, I had to use crampons going across the show field in the late afternoon ",
-		};		
-		extractor.buildPTTemplates(texts);
-		List<GeneralizationResult>  res = extractor.doIEforPT( "I had to use crampons to cross snow slopes without an ice axe in late afternoon. However in summer I do not feel it was dangerous crossing the snow.");
-
-		System.exit(0);
-
-		extractor.buildTemplates(new String[] { ""
-				+ "A junior sale engineer expert travels to customers on site. A junior design expert goes to customer companies. "
-				+ "A junior software engineer rushes to customer sites. "   
-		});
-		res = extractor.doIE( "Cisco junior sale representative expert flew to customers data centers. ");
-
-		extractor.init();
-
-		extractor.buildTemplates(new String[] { "John Doe send his California driver license 1234567. "
-				+ "Jill Paparapathi received her Ohio license 4567456"   });
-
-		res = extractor.doIE( "Mary Jones send her Canada prisoner id number 666666666. Mary Stewart hid her Mexico cook id number 666666666 . Robin Hood mentioned his UK fisher id  2345."
-				+ "Yesterday Peter Doe hid his Bolivia set id number 666666666. Robin mentioned her best Peru fisher man id  2345. Spain hid her Catalonian driver id number 666666666. John Poppins hid her  prisoner id  666666666. "
-				+ "Microsoft announced its Canada windows azure release number 666666666. John Poppins hid her Apple id  666666666");
-
-	}
-
-}


Mime
View raw message