opennlp-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bgalit...@apache.org
Subject [08/11] opennlp-sandbox git commit: removed stanford nlp refs
Date Tue, 22 Nov 2016 13:05:22 GMT
http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/Matcher.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/Matcher.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/Matcher.java
deleted file mode 100644
index 8540ff2..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/Matcher.java
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.parse_thicket.matching;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import opennlp.tools.parse_thicket.IGeneralizer;
-import opennlp.tools.parse_thicket.ParseCorefBuilderWithNER;
-import opennlp.tools.parse_thicket.ParseThicket;
-import opennlp.tools.parse_thicket.ParseTreeNode;
-import opennlp.tools.parse_thicket.VerbNetProcessor;
-import opennlp.tools.textsimilarity.ParseTreeChunk;
-
-public class Matcher implements IGeneralizer<List<List<ParseTreeNode>>>{
-	public static String resourceDir = new File(".").getAbsolutePath().replace("/.", "") + "/src/test/resources";
-	VerbNetProcessor proc = VerbNetProcessor.getInstance(resourceDir);
-
-	protected PhraseGroupGeneralizer pgGen = new PhraseGroupGeneralizer();
-
-	protected static ParseCorefBuilderWithNER ptBuilder = null;
-	
-	static {
-		synchronized (Matcher.class) {
-			ptBuilder = ParseCorefBuilderWithNER.getInstance();
-		}
-	}
-	
-	
-	PT2ThicketPhraseBuilder phraseBuilder = new PT2ThicketPhraseBuilder();
-	protected Map<String, ParseThicket> parseThicketHash = new HashMap<String, ParseThicket>();
-
-
-	/**	   * The key function of similarity component which takes two portions of text
-	 * and does similarity assessment by finding the set of all maximum common
-	 * subtrees of the set of parse trees for each portion of text
-	 * 
-	 * @param input
-	 *          text 1
-	 * @param input
-	 *          text 2
-	 * @return the matching results structure, which includes the similarity score
-	 */
-	private static Matcher instance;
-
-	public synchronized static Matcher getInstance() {
-		if (instance == null)
-			instance = new Matcher();
-
-		return instance;
-	}
-
-
-	public List<List<ParseTreeChunk>> assessRelevance(String para1, String para2) {
-		// first build PTs for each text
-		ParseThicket pt1 = ptBuilder.buildParseThicket(para1);
-		ParseThicket pt2 = ptBuilder.buildParseThicket(para2);
-		// then build phrases and rst arcs
-		List<List<ParseTreeNode>> phrs1 = phraseBuilder.buildPT2ptPhrases(pt1);
-		List<List<ParseTreeNode>> phrs2 = phraseBuilder.buildPT2ptPhrases(pt2);
-		// group phrases by type
-		List<List<ParseTreeChunk>> sent1GrpLst = formGroupedPhrasesFromChunksForPara(phrs1), 
-				sent2GrpLst = formGroupedPhrasesFromChunksForPara(phrs2);
-
-
-		List<List<ParseTreeChunk>> res = pgGen.generalize(sent1GrpLst, sent2GrpLst);
-
-		return res;
-
-	}
-
-
-	public List<List<ParseTreeChunk>> assessRelevance(List<List<ParseTreeChunk>> para0, String para2) {
-		// first build PTs for each text
-
-		ParseThicket pt2 = ptBuilder.buildParseThicket(para2);
-		// then build phrases and rst arcs
-		List<List<ParseTreeNode>> phrs2 = phraseBuilder.buildPT2ptPhrases(pt2);
-		// group phrases by type
-		List<List<ParseTreeChunk>> sent2GrpLst = formGroupedPhrasesFromChunksForPara(phrs2);
-
-
-		List<List<ParseTreeChunk>> res = pgGen.generalize(para0, sent2GrpLst);
-
-		return res;
-
-	}
-
-	public GeneralizationResult  assessRelevanceG(List<List<ParseTreeChunk>> para0, String para2) {
-		List<List<ParseTreeChunk>> res = assessRelevance( para0, para2);
-		return new GeneralizationResult(res);
-	}
-
-	public GeneralizationResult  assessRelevanceG(String para0, String para2) {
-		List<List<ParseTreeChunk>> res = assessRelevance( para0, para2);
-		return new GeneralizationResult(res);
-	}
-
-	public GeneralizationResult  assessRelevanceG(GeneralizationResult  para0, String para2) {
-		List<List<ParseTreeChunk>> res = assessRelevance( para0.getGen(), para2);
-		return new GeneralizationResult(res);
-	}
-
-	public List<List<ParseTreeChunk>> assessRelevanceCache(String para1, String para2) {
-		// first build PTs for each text
-
-		ParseThicket pt1 = parseThicketHash.get(para1);
-		if (pt1==null){
-			pt1=	ptBuilder.buildParseThicket(para1);
-			parseThicketHash.put(para1, pt1);
-		}
-
-		ParseThicket pt2 = parseThicketHash.get(para2);
-		if (pt2==null){
-			pt2=	ptBuilder.buildParseThicket(para2);
-			parseThicketHash.put(para2, pt2);
-		}
-
-		// then build phrases and rst arcs
-		List<List<ParseTreeNode>> phrs1 = phraseBuilder.buildPT2ptPhrases(pt1);
-		List<List<ParseTreeNode>> phrs2 = phraseBuilder.buildPT2ptPhrases(pt2);
-		// group phrases by type
-		List<List<ParseTreeChunk>> sent1GrpLst = formGroupedPhrasesFromChunksForPara(phrs1), 
-				sent2GrpLst = formGroupedPhrasesFromChunksForPara(phrs2);
-
-
-		List<List<ParseTreeChunk>> res = pgGen.generalize(sent1GrpLst, sent2GrpLst);
-		return res;
-
-	}
-
-	public List<List<ParseTreeChunk>> generalize(List<List<ParseTreeNode>> phrs1,
-			List<List<ParseTreeNode>> phrs2) {
-		// group phrases by type
-		List<List<ParseTreeChunk>> sent1GrpLst = formGroupedPhrasesFromChunksForPara(phrs1), 
-				sent2GrpLst = formGroupedPhrasesFromChunksForPara(phrs2);
-
-
-		List<List<ParseTreeChunk>> res = pgGen.generalize(sent1GrpLst, sent2GrpLst);
-		return res;
-	}
-	protected List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForPara(
-			List<List<ParseTreeNode>> phrs) {
-		List<List<ParseTreeChunk>> results = new ArrayList<List<ParseTreeChunk>>();
-		List<ParseTreeChunk> nps = new ArrayList<ParseTreeChunk>(), vps = new ArrayList<ParseTreeChunk>(), 
-				pps = new ArrayList<ParseTreeChunk>();
-		for(List<ParseTreeNode> ps:phrs){
-			ParseTreeChunk ch = new ParseTreeChunk(ps);
-			String ptype = ps.get(0).getPhraseType();
-			if (ptype.equals("NP")){
-				nps.add(ch);
-			} else if (ptype.equals("VP")){
-				vps.add(ch);
-			} else if (ptype.equals("PP")){
-				pps.add(ch);
-			}
-		}
-		results.add(nps); results.add(vps); results.add(pps);
-		return results;
-	}
-
-	private ParseTreeChunk convertNodeListIntoChunk(List<ParseTreeNode> ps) {
-		List<String> lemmas = new ArrayList<String>(),  poss = new ArrayList<String>();
-		for(ParseTreeNode n: ps){
-			lemmas.add(n.getWord());
-			poss.add(n.getPos());
-		}
-		ParseTreeChunk ch = new ParseTreeChunk(lemmas, poss, 0, 0);
-		ch.setMainPOS(ps.get(0).getPhraseType());
-		ch.setParseTreeNodes(ps);
-		return ch;
-	}
-
-	// this function is the main entry point into the PT builder if rst arcs are required
-	public ParseThicket buildParseThicketFromTextWithRST(String para){
-		ParseThicket pt = ptBuilder.buildParseThicket(para);
-		List<List<ParseTreeNode>> phrs = phraseBuilder.buildPT2ptPhrases(pt);
-		pt.setPhrases(phrs);
-		return pt;	
-	}
-
-	// verify that all sections (NP, PRP and VP are present
-	public boolean isCoveredByTemplate(List<List<ParseTreeChunk>> template, List<List<ParseTreeChunk>> sampleGen){
-		try {
-			if (template.size() == sampleGen.size() && sampleGen.get(0).size()>0  &&  sampleGen.get(1).size()>0  )
-				//template.get(0).get(0).getParseTreeNodes().size() == template.get(0).get(0).size())
-				return true;
-		} catch (Exception e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
-
-		return false;
-	}
-
-	@Override
-	public List<List<List<ParseTreeNode>>> generalize(Object o1, Object o2) {
-		// TODO Auto-generated method stub
-		return null;
-	}
-
-
-	public static void main(String[] args){
-		Matcher m = new Matcher();
-
-		m.buildParseThicketFromTextWithRST("Mary Poppins got her identification 8765");
-
-		List<List<ParseTreeChunk>> template = m.assessRelevance("John Doe send his California driver license 1234567", 
-				"John Travolta send her california license 4567456"
-				//"New York hid her US social number 666-66-6666");
-				);
-
-		System.out.println(template+"\n");
-		//in		
-		List<List<ParseTreeChunk>> res = m.assessRelevance(template, "Mary Jones send her Canada prisoner id number 666666666");
-		System.out.println(res+ " => "+
-				m.isCoveredByTemplate(template, res));
-		res = m.assessRelevance(template, "Mary Stewart hid her Mexico cook id number 666666666");
-		System.out.println(res + " => "+
-				m.isCoveredByTemplate(template, res));
-		res = m.assessRelevance(template, "Robin mentioned her Peru fisher id  2345");
-		System.out.println(res+ " => "+
-				m.isCoveredByTemplate(template, res));
-		res = m.assessRelevance(template, "Yesterday Peter Doe hid his Bolivia set id number 666666666");
-		System.out.println(res + " => "+
-				m.isCoveredByTemplate(template, res));
-		res = m.assessRelevance(template, "Robin mentioned her best Peru fisher man id  2345");
-		System.out.println(res+ " => "+
-				m.isCoveredByTemplate(template, res));
-		//out		
-		res = m.assessRelevance(template, "Spain hid her Canada driver id number 666666666");
-		System.out.println(res+ " => "+
-				m.isCoveredByTemplate(template, res));
-		res = m.assessRelevance(template, "John Poppins hid her  prisoner id  666666666");
-		System.out.println(res+ " => "+
-				m.isCoveredByTemplate(template, res));
-
-		res = m.assessRelevance(template, "Microsoft announced its Windows Azure release number 666666666");
-		System.out.println(res+ " => "+
-				m.isCoveredByTemplate(template, res));
-		res = m.assessRelevance(template, "John Poppins hid her Google id  666666666");
-		System.out.println(res+ " => "+
-				m.isCoveredByTemplate(template, res));
-	}
-}
-

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/MyMatcher.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/MyMatcher.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/MyMatcher.java
deleted file mode 100644
index 97ab041..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/MyMatcher.java
+++ /dev/null
@@ -1,126 +0,0 @@
-package opennlp.tools.parse_thicket.matching;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.log4j.Logger;
-import org.deeplearning4j.berkeley.Pair;
-
-import opennlp.tools.textsimilarity.ParseTreeChunk;
-import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
-import opennlp.tools.textsimilarity.TextProcessor;
-
-import org.apache.log4j.Level;
-import org.apache.log4j.LogManager;
-
-import edu.stanford.nlp.util.StringUtils;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.util.*;
-
-/**
- * Created by sanviswa on 10/29/16.
- */
-public class MyMatcher {
-
-    Matcher m = new Matcher();
-
-
-    public static void main(String[] args) throws Exception
-    {
-
-        MyMatcher myMatcher = new MyMatcher();
-        myMatcher.runTest(myMatcher.readFile());
-    }
-
-    public List<String> readFile() throws Exception
-    {
-     //   BufferedReader br = new BufferedReader(new FileReader(this.getClass().getResource("/fidelity.txt").getPath()));
-        List<String> al = new ArrayList<String>();
-     /*   String line = null;
-        while ((line = br.readLine()) != null) {
-
-            al.add(line);
-        }
-        br.close(); */
-    	String content = FileUtils.readFileToString(new File("/Users/bgalitsky/Documents/relevance-based-on-parse-trees/fidelity.txt"));
-        String[] als = content.split("\n");
-    	al = Arrays.asList(als);
-    	return al;
-    }
-
-    public void runTest(List<String> lst) throws Exception
-    {
-        System.out.println("Enter text: ");
-        Scanner scanner = new Scanner(System.in);
-        String queryStr = scanner.nextLine();
-        if("quit".equals(queryStr))
-        {
-            return;
-        }
-        else
-        {
-            checkLinguisticScores(queryStr,lst);
-            runTest(lst);
-        }
-
-    }
-
-    public void checkLinguisticScores(String q, List<String> aList) throws Exception
-    {   // convert query into list of tokens
-    	List<String> queryTokens = TextProcessor.fastTokenize(q.toLowerCase(), false);
-    	
-    	List<String> shortListedClasses = new ArrayList<String>();
-    	for (String ans: aList) {
-    		// convert answer class into the list of tokens
-    		List<String> classTokens = TextProcessor.fastTokenize(ans.toLowerCase(), false);
-    		// do intersection of tokens
-    		classTokens.retainAll(queryTokens);
-    		int tokenScore = 0;
-    		// count significant tokens / no stopwords
-    		for(String word: classTokens){
-    			if (word.length()>2 && StringUtils.isAlpha(word))
-    				tokenScore++;
-    		}
-    		if (tokenScore>1)
-    			 shortListedClasses.add(ans);
-    	}
-    	// do it again with lower thresh, if too few results
-    	if (shortListedClasses.size()<5)
-    		for (String ans: aList) {
-        		List<String> classTokens = TextProcessor.fastTokenize(ans.toLowerCase(), false);
-        		classTokens.retainAll(queryTokens);
-        		int tokenScore = 0;
-        		for(String word: classTokens){
-        			if (word.length()>2 && StringUtils.isAlpha(word))
-        				tokenScore++;
-        		}
-        		if (tokenScore>=1)
-        			 shortListedClasses.add(ans);
-        	}
-    	// if no overlap give up of do the full list 
-    	if (shortListedClasses.isEmpty())
-    		shortListedClasses = aList;
-    		
-        ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();
-
-        ArrayList<Pair<String,Double>> pairList = new ArrayList<Pair<String,Double>>();
-
-        for (String ans: shortListedClasses) {
-
-            List<List<ParseTreeChunk>> res = m.assessRelevanceCache(q, ans);
-            double score1 = parseTreeChunkListScorer.getParseTreeChunkListScoreAggregPhraseType(res);
-            Pair<String,Double> p = new Pair<String, Double>(ans, score1);
-            pairList.add(p);
-        }
-
-        Collections.sort(pairList, Comparator.comparing(p -> p.getSecond()));
-
-      System.out.println("***** '" + q + "' ******* falls into the following categories: ");
-        for (Pair<String, Double> score: pairList) {
-            System.out.println("        " + score.getFirst() + ": " + score.getSecond());
-        }
-
-
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilder.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilder.java
deleted file mode 100644
index 5f07593..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilder.java
+++ /dev/null
@@ -1,477 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.parse_thicket.matching;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.logging.Logger;
-
-import opennlp.tools.parse_thicket.ParseThicket;
-import opennlp.tools.parse_thicket.ParseTreeNode;
-import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc;
-import opennlp.tools.parse_thicket.rhetoric_structure.RhetoricStructureArcsBuilder;
-import edu.stanford.nlp.trees.Tree;
-
-public class PT2ThicketPhraseBuilder {
-
-	RhetoricStructureArcsBuilder rstBuilder = new RhetoricStructureArcsBuilder();
-	private static Logger log = Logger
-		      .getLogger("opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder");
-
-	/*
-	 * Building phrases takes a Parse Thicket and forms phrases for each sentence individually
-	 * Then based on built phrases and obtained arcs, it builds arcs for RST
-	 * Finally, based on all formed arcs, it extends phrases with thicket phrases
-	 */
-
-	public List<List<ParseTreeNode>> buildPT2ptPhrases(ParseThicket pt ) {
-		List<List<ParseTreeNode>> phrasesAllSent = new ArrayList<List<ParseTreeNode>> ();
-		if (pt ==null) // parsing failed, return empty
-			return phrasesAllSent;
-		Map<Integer, List<List<ParseTreeNode>>> sentNumPhrases = new HashMap<Integer, List<List<ParseTreeNode>>>();
-		// build regular phrases
-		for(int nSent=0; nSent<pt.getSentences().size(); nSent++){
-			List<ParseTreeNode> sentence = pt.getNodesThicket().get(nSent);
-			Tree ptree = pt.getSentences().get(nSent);
-			//ptree.pennPrint();
-			List<List<ParseTreeNode>> phrases = buildPT2ptPhrasesForASentence(ptree, sentence);
-			log.info(phrases.toString());
-			phrasesAllSent.addAll(phrases);
-			sentNumPhrases.put(nSent, phrases);
-
-		}
-
-		// discover and add RST arcs
-		List<WordWordInterSentenceRelationArc> arcsRST =
-				rstBuilder.buildRSTArcsFromMarkersAndCorefs(pt.getArcs(), sentNumPhrases, pt);
-
-		List<WordWordInterSentenceRelationArc> arcs = pt.getArcs();
-		arcs.addAll(arcsRST);
-		pt.setArcs(arcs);
-		
-		if (pt.getArcs().size()>20){
-			log.info(pt.toString());
-		}
-
-		List<List<ParseTreeNode>> expandedPhrases = expandTowardsThicketPhrases(phrasesAllSent, pt.getArcs(), sentNumPhrases, pt);
-		return expandedPhrases;
-	}
-
-	/* Take all phrases, all arcs and merge phrases into Thicket phrases.
-	 * Then add the set of generalized (Thicket) phrases to the input set of phrases
-	 * phrasesAllSent - list of lists of phrases for each sentence
-	 * sentNumPhrase - map , gives for each sentence id, the above list
-	 * arcs - arcs formed so far
-	 * pt - the built Parse Thicket
-	 */
-	protected List<List<ParseTreeNode>> expandTowardsThicketPhrases(
-			List<List<ParseTreeNode>> phrasesAllSent,
-			List<WordWordInterSentenceRelationArc> arcs,
-			Map<Integer, List<List<ParseTreeNode>>> sentNumPhrases, 
-			ParseThicket pt ) {
-		List<List<ParseTreeNode>> thicketPhrasesAllSent = new ArrayList<List<ParseTreeNode>>();
-
-
-		for(int nSent=0; nSent<pt.getSentences().size(); nSent++){
-			for(int mSent=nSent+1; mSent<pt.getSentences().size(); mSent++){
-				// for given arc, find phrases connected by this arc and add to the list of phrases
-				for(WordWordInterSentenceRelationArc arc: arcs){
-					List<List<ParseTreeNode>> phrasesFrom = sentNumPhrases.get(nSent);
-					List<List<ParseTreeNode>> phrasesTo = sentNumPhrases.get(mSent);
-					int fromIndex = arc.getCodeFrom().getFirst();
-					int toIndex = arc.getCodeTo().getFirst();
-					if (nSent==fromIndex && mSent==toIndex){
-						int sentPosFrom = arc.getCodeFrom().getSecond();
-						int sentPosTo = arc.getCodeTo().getSecond();
-						// for the given arc arc, find phrases which are connected by it
-						List<ParseTreeNode> lFromFound = null, lToFound = null;
-						for(List<ParseTreeNode> lFrom: phrasesFrom){
-							if (lToFound!=null)
-								break;
-							for(ParseTreeNode lFromP: lFrom){
-								if (lFromP.getId()!=null &&  lFromP.getId()==sentPosFrom){
-									lFromFound = lFrom;
-									break;
-								}
-							}
-						}
-						for(List<ParseTreeNode> lTo: phrasesTo){
-							if (lToFound!=null)
-								break;
-							for(ParseTreeNode lToP: lTo)
-								if (lToP.getId()!=null && lToP.getId()==sentPosTo){
-									lToFound = lTo;
-									break;
-								}
-						}
-						// obtain a thicket phrase and add it to the list
-						if (lFromFound!=null && lToFound!=null){
-
-							if (identicalSubPhrase(lFromFound, lToFound))
-								continue;
-							List<ParseTreeNode> appended = append(lFromFound, lToFound);
-							if (thicketPhrasesAllSent.contains(appended))
-								continue;
-							log.info("rel: "+arc);
-							log.info("From "+lFromFound);
-							System.out.println("TO "+lToFound);
-							thicketPhrasesAllSent.add(append(lFromFound, lToFound));	
-							//break;
-						}
-					}
-
-				}
-			}
-		}
-		phrasesAllSent.addAll(thicketPhrasesAllSent);
-		return phrasesAllSent;
-	}
-
-	/* check that one phrase is subphrase of another by lemma (ignoring other node properties)
-	 * returns true if not found different word
-	 */
-
-	private boolean identicalSubPhrase(List<ParseTreeNode> lFromFound,
-			List<ParseTreeNode> lToFound) {
-		for(int pos=0; pos<lFromFound.size()&& pos<lToFound.size(); pos++){
-			if (!lFromFound.get(pos).getWord().equals(lToFound.get(pos).getWord()))
-				return false;
-		}
-		return true;
-	}
-
-	private List<ParseTreeNode> append(List<ParseTreeNode> lFromFound,
-			List<ParseTreeNode> lToFound) {
-		List<ParseTreeNode> appendList = new ArrayList<ParseTreeNode>();
-		if (lFromFound.get(0).getPhraseType().equals(lToFound.get(0).getPhraseType())){
-			appendList.addAll(lFromFound);
-			appendList.addAll(lToFound);
-		} else {
-			String pType = lFromFound.get(0).getPhraseType();
-			appendList.addAll(lFromFound);
-			for(ParseTreeNode p: lToFound){
-				p.setPhraseType(pType);
-				appendList.add(p);
-			}
-		}
-		return appendList;
-	}
-
-
-	public List<List<ParseTreeNode>> buildPT2ptPhrasesForASentence(Tree tree, List<ParseTreeNode> sentence ) {
-		List<List<ParseTreeNode>> phrases;
-
-		phrases = new ArrayList<List<ParseTreeNode>>();		
-		navigateR(tree, sentence, phrases);
-
-		return phrases;
-	}
-
-
-
-
-	/*
-	 * 
-[[<1>NP'Iran':NNP], [<2>VP'refuses':VBZ, <3>VP'to':TO, <4>VP'accept':VB, <5>VP'the':DT, <6>VP'UN':NNP, 
-<7>VP'proposal':NN, <8>VP'to':TO, <9>VP'end':VB, <10>VP'its':PRP$, <11>VP'dispute':NN, <12>VP'over':IN, <13>VP'its':PRP$,
- <14>VP'work':NN, <15>VP'on':IN, <16>VP'nuclear':JJ, <17>VP'weapons':NNS], [<3>VP'to':TO, <4>VP'accept':VB, <5>VP'the':DT,
-  <6>VP'UN':NNP, <7>VP'proposal':NN, <8>VP'to':TO, <9>VP'end':VB, <10>VP'its':PRP$, <11>VP'dispute':NN, <12>VP'over':IN, 
-  <13>VP'its':PRP$, <14>VP'work':NN, <15>VP'on':IN, <16>VP'nuclear':JJ, <17>VP'weapons':NNS], [<4>VP'accept':VB, 
-  <5>VP'the':DT, <6>VP'UN':NNP, <7>VP'proposal':NN, <8>VP'to':TO, <9>VP'end':VB, <10>VP'its':PRP$, <11>VP'dispute':NN, 
-  <12>VP'over':IN, <13>VP'its':PRP$, <14>VP'work':NN, <15>VP'on':IN, <16>VP'nuclear':JJ, <17>VP'weapons':NNS], 
-  [<5>NP'the':DT, <6>NP'UN':NNP, <7>NP'proposal':NN], [<8>VP'to':TO, <9>VP'end':VB, <10>VP'its':PRP$, <11>VP'dispute':NN, 
-  <12>VP'over':IN, <13>VP'its':PRP$, <14>VP'work':NN, <15>VP'on':IN, <16>VP'nuclear':JJ, <17>VP'weapons':NNS], 
-  [<9>VP'end':VB, <10>VP'its':PRP$, <11>VP'dispute':NN, <12>VP'over':IN, <13>VP'its':PRP$, <14>VP'work':NN, <15>VP'on':IN,
-   <16>VP'nuclear':JJ, <17>VP'weapons':NNS], [<10>NP'its':PRP$, <11>NP'dispute':NN], [<12>PP'over':IN, <13>PP'its':PRP$, 
-   <14>PP'work':NN, <15>PP'on':IN, <16>PP'nuclear':JJ, <17>PP'weapons':NNS], [<13>NP'its':PRP$, <14>NP'work':NN, 
-   <15>NP'on':IN, <16>NP'nuclear':JJ, <17>NP'weapons':NNS], [<13>NP'its':PRP$, <14>NP'work':NN],
- [<15>PP'on':IN, <16>PP'nuclear':JJ, <17>PP'weapons':NNS], [<16>NP'nuclear':JJ, <17>NP'weapons':NNS]]
-	 *  
-	 * 
-	 */
-	private void navigateR(Tree t, List<ParseTreeNode> sentence,
-			List<List<ParseTreeNode>> phrases) {
-		if (!t.isPreTerminal()) {
-			if (t.label() != null) {
-				if (t.value() != null) {
-					// if ROOT or S, returns empty
-					List<ParseTreeNode> nodes = parsePhrase(t.label().value(), t.toString());
-					nodes = assignIndexToNodes(nodes, sentence);
-					if (!nodes.isEmpty())
-						phrases.add(nodes);
-					if (nodes.size()>0 && nodes.get(0).getId()==null){
-						if (nodes.size()>1 && nodes.get(1)!=null && nodes.get(1).getId()!=null){
-							try {
-								ParseTreeNode n = nodes.get(0);
-								n.setId(nodes.get(1).getId()-1);
-								nodes.set(0, n);
-							} catch (Exception e) {
-								e.printStackTrace();
-							}
-						} else {
-							log.severe("Failed alignment:"+nodes);
-						}
-					}
-				}
-			}
-			Tree[] kids = t.children();
-			if (kids != null) {
-				for (Tree kid : kids) {
-					navigateR(kid,sentence,  phrases);
-				}
-			}
-			return ;
-		}
-	}
-
-
-	/* alignment of phrases extracted from tree against the sentence as a list of lemma-pos */
-
-	private List<ParseTreeNode> assignIndexToNodes(List<ParseTreeNode> node,
-			List<ParseTreeNode> sentence) {
-		if (sentence==null || sentence.size()<1)
-			return node;
-
-		List<ParseTreeNode> results = new ArrayList<ParseTreeNode>();
-
-		for(int i= 0; i<node.size(); i++){
-			String thisLemma = node.get(i).getWord();			
-			String thisPOS = node.get(i).getPos();
-			String nextLemma = null, nextPOS = null;
-
-			if (i+1<node.size()){
-				nextLemma = node.get(i+1).getWord();
-				nextPOS = node.get(i+1).getPos();
-			}
-			Boolean matchOccurred = false;
-			int j = 0;
-			for(j= 0; j<sentence.size(); j++){
-				if (!(sentence.get(j).getWord().equals(thisLemma) && (sentence.get(j).getPos().equals(thisPOS))))
-					continue;
-				if (i+1<node.size() && j+1 < sentence.size() && nextLemma!=null 
-						&& ! (sentence.get(j+1).getWord().equals(nextLemma)
-								&& sentence.get(j+1).getPos().equals(nextPOS)))
-					continue;
-				matchOccurred = true;
-				break;
-			}
-
-			ParseTreeNode n = node.get(i);
-			if (matchOccurred){
-				n.setId(sentence.get(j).getId());
-				n.setNe(sentence.get(j).getNe());
-				n.setAttributes(sentence.get(j).getAttributes());
-			}
-			results.add(n);
-		}
-
-		try {
-			if (results!=null && results.size()>1 && results.get(0)!=null && results.get(0).getId()!=null &&
-					results.get(1) !=null && results.get(1).getId()!=null &&  results.get(1).getId()>0){
-				ParseTreeNode p = results.get(0);
-				p.setId(results.get(1).getId()-1);
-				results.set(0, p);
-			}
-		} catch (Exception e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
-		return results;
-	}
-
-
-	/*
-	 * [[NP'':], ['(NNP':Iran)], [VP'':], ['(VBZ':refuses)], [VP'':], ['(TO':to)], [VP'':], ['(VB':accept)], [NP'':], 
-	 * ['(DT':the)], ['(NNP':UN)], ['(NN':proposal)], [VP'':], ['(TO':to)], [VP'':], ['(VB':end)], [NP'':], 
-	 * ['(PRP$':its)], ['(NN':dispute)], [PP'':], ['(IN':over)], [NP'':], [NP'':],
-	 *  ['(PRP$':its)], ['(NN':work)], [PP'':], ['(IN':on)], [NP'':], ['(JJ':nuclear)], ['(NNS':weapons)], ['(.':.)]]
-	 * 
-	 * [[NP'':], ['(NNP':Iran)],
- [VP'':], ['(VBZ':refuses)], 
- [VP'':], ['(TO':to)], 
- [VP'':], ['(VB':accept)], 
-    [NP'':], ['(DT':the)], ['(NNP':UN)], ['(NN':proposal)], 
-    [VP'':], ['(TO':to)], [VP'':], ['(VB':end)], 
-    [NP'':], ['(PRP$':its)], ['(NN':dispute)], 
-        [PP'':], ['(IN':over)], 
-            [NP'':], [NP'':], ['(PRP$':its)], ['(NN':work)], 
-              [PP'':], ['(IN':on)], 
-                [NP'':], ['(JJ':nuclear)], ['(NNS':weapons)], 
-['(.':.)]]
-	 */
-	private void navigateR1(Tree t, List<ParseTreeNode> sentence, int l,
-			List<List<ParseTreeNode>> phrases) {
-		if (t.isPreTerminal()) {
-			if (t.label() != null) {
-				List<ParseTreeNode> node = parsePhrase(t.toString());	
-				if (!node.isEmpty())
-					phrases.add(node);
-			}
-			return;
-		} else {
-			if (t.label() != null) {
-				if (t.value() != null) {
-					List<ParseTreeNode> node = parsePhrase(t.label().value());		 
-					if (!node.isEmpty())
-						phrases.add(node);
-				}
-			}
-			Tree[] kids = t.children();
-			if (kids != null) {
-				for (Tree kid : kids) {
-					navigateR1(kid,sentence,  l, phrases);
-				}
-			}
-			return ;
-		}
-	}
-
-
-	protected List<ParseTreeNode> parsePhrase(String value) {
-		List<ParseTreeNode> nlist = new ArrayList<ParseTreeNode>(); 
-		if (value==null)
-			return nlist;
-		if (value.equals("ROOT")|| value.equals("S")) 
-			return nlist;
-
-		String[] pos_value = value.split(" ");
-		ParseTreeNode node = null;
-		if (value.endsWith("P")){
-			node = new ParseTreeNode("", ""); 
-			node.setPhraseType(value);
-		} else 
-			if (pos_value != null && pos_value.length==2){
-				node = new ParseTreeNode(pos_value[0], pos_value[1]);
-			} else {
-				node = new ParseTreeNode(value, "");
-			}
-
-		nlist.add(node);
-		return nlist;
-	}
-
-	private ParseTreeNode parsePhraseNode(String value) {
-
-		if (value.equals("ROOT")|| value.equals("S")) 
-			return null;
-
-		String[] pos_value = value.split(" ");
-		ParseTreeNode node = null;
-		if (value.endsWith("P")){
-			node = new ParseTreeNode("", ""); 
-			node.setPhraseType(value);
-		} else 
-			if (pos_value != null && pos_value.length==2){
-				node = new ParseTreeNode(pos_value[0], pos_value[1]);
-			} else {
-				node = new ParseTreeNode(value, "");
-			}			
-
-		return node;
-	}
-
-	public List<ParseTreeNode> parsePhrase(String value, String fullDump) {
-
-		List<ParseTreeNode> nlist = new ArrayList<ParseTreeNode>(); 
-		if (value.equals("S")|| value.equals("ROOT"))
-			return nlist;
-		// first phrase type normalization
-		fullDump = fullDump.replace("NP-TMP", "NP");
-		
-		String flattened = fullDump.replace("(ROOT","").replace("(NP","").replace("(VP","").replace("(PP","")
-				.replace("(ADVP","").replace("(UCP","").replace("(ADJP","").replace("(SBAR","").
-				replace("(PRT", "").replace("(WHNP","").
-				replace("))))",")").replace(")))",")").replace("))",")")
-				.replace("   ", " ").replace("  ", " ").replace("(S","")
-				.replace(") (","#").replace(")  (", "#");
-		String[] flattenedArr =  flattened.split("#");
-		for(String term: flattenedArr){
-			term = term.replace('(', ' ').replace(')',' ').trim();
-			if (term!=null && term.split(" ")!=null && term.split(" ").length==2){
-				ParseTreeNode node = new ParseTreeNode(term.split(" ")[1],term.split(" ")[0] );
-				node.setPhraseType(value);
-				nlist.add(node);
-			}
-		}
-		return nlist;
-	}
-
-	/* recursion example */
-
-	private StringBuilder toStringBuilder(StringBuilder sb, Tree t) {
-		if (t.isLeaf()) {
-			if (t.label() != null) {
-				sb.append(t.label().value());
-			}
-			return sb;
-		} else {
-			sb.append('(');
-			if (t.label() != null) {
-				if (t.value() != null) {
-					sb.append(t.label().value());
-				}
-			}
-			Tree[] kids = t.children();
-			if (kids != null) {
-				for (Tree kid : kids) {
-					sb.append(' ');
-					toStringBuilder(sb, kid);
-				}
-			}
-			return sb.append(')');
-		}
-	}
-
-	public static void main(String[] args){
-		Matcher matcher = new Matcher();
-		String para = 
-				"Last Wednesday, world powers reached agreement with Iran on limiting Iranian nuclear activity in return for the lifting of sanctions. "
-		/*+
-						"The Israeli Prime Minister called the deal an historic mistake which would only make it easier for Iran to back its proxies in the Middle East. "+
-						"That position may have hardened after Iran's supreme leader Ayatollah Ali Khamenei said his country would continue its support for the people of Palestine after the deal. "+
-						"Saudi Arabia has officially said it supports the deal, although it is also thought to have similar concerns to Israel that the agreement legitimises Iran. "
-						*/
-						;
-		matcher.buildParseThicketFromTextWithRST(para);
-		
-		
-		PT2ThicketPhraseBuilder phraseBuilder = new PT2ThicketPhraseBuilder();
-		String line = "(NP (NNP Iran)) (VP (VBZ refuses) (S (VP (TO to) (VP (VB accept) (S (NP (DT the) " +
-				"(NNP UN) (NN proposal)) (VP (TO to) (VP (VB end) (NP (PRP$ its) (NN dispute))))))))";
-
-		List<ParseTreeNode> res = phraseBuilder. parsePhrase("NP", line);
-		System.out.println(res);
-
-
-		line = "(VP (VBP am) (NP (NP (DT a) (NNP US) (NN citizen)) (UCP (VP (VBG living) (ADVP (RB abroad))) (, ,) (CC and) (ADJP (JJ concerned) (PP (IN about) (NP (NP (DT the) (NN health) (NN reform) (NN regulation)) (PP (IN of) (NP (CD 2014)))))))))";
-		res = phraseBuilder. parsePhrase("VP", line);
-		System.out.println(res);
-
-		line = "(VP (TO to) (VP (VB wait) (SBAR (IN till) (S (NP (PRP I)) (VP (VBP am) (ADJP (JJ sick) (S (VP (TO to) (VP (VB buy) (NP (NN health) (NN insurance)))))))))))";
-		res = phraseBuilder. parsePhrase("VP", line);
-		System.out.println(res);
-	}
-
-}
-/*
- * The Ukrainian government, Western leaders and Nato all say there is clear evidence that Russia is helping the rebels in the eastern Donetsk and Luhansk regions with heavy weapons and soldiers. Independent experts echo that accusation.
-Moscow denies it, insisting that any Russians serving with the rebels are volunteers.
-
-*/

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PersonalInformationExtractor.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PersonalInformationExtractor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PersonalInformationExtractor.java
deleted file mode 100644
index 5df0dee..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PersonalInformationExtractor.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.parse_thicket.matching;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import opennlp.tools.jsmlearning.ProfileReaderWriter;
-import opennlp.tools.parse_thicket.VerbNetProcessor;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.tika.Tika;
-import org.apache.tika.exception.TikaException;
-
-public class PersonalInformationExtractor {
-	FrameQueryBasedIExtractor extractor = new FrameQueryBasedIExtractor();
-	private ArrayList<File> queue = new ArrayList<File>();
-	private Tika tika = new Tika();
-
-	public void runExtractor(String filename){
-		String content = null;
-		try {
-			content = FileUtils.readFileToString(new File(filename));
-		} catch (IOException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
-
-
-		extractor.buildTemplates(new String[] { "John Doe send his California driver license 1234567 . "
-				+ "Jill Jones received her Ohio license 4567456. ", 
-				" Mary Poppins got her identification 8765. Jorge Malony sold his identification 9876. ",
-				//" President Jorge Smith of Microsoft used his id 4567. Manager John Smith of Google used his id 8765. "
-				" Johh Doe 123. Don Joe 1323. "
-
-		});
-
-		List<GeneralizationResult>  res = extractor.doIE( content);
-
-	}
-
-
-	private void addFiles(File file) {
-
-		try {
-			if (!file.exists()) {
-				System.out.println(file + " does not exist.");
-			}
-			if (file.isDirectory()) {
-				for (File f : file.listFiles()) {
-					try {
-						addFiles(f);
-					} catch (Exception e) {
-					}
-				}
-			} else {
-				queue.add(file);
-			}
-		} catch (Exception e) {
-
-		}
-	}
-
-	public void processDirectory(String filename, String template) throws IOException {
-		List<String[]> report = new ArrayList<String[]>(); 
-		report.add(new String[]{"filename", "text",  "generalization", "fired?" });
-		String templateStr = null;
-		try {
-
-			templateStr =  FileUtils.readFileToString(new File(template));
-		} catch (IOException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
-
-		String[] samples = templateStr.split("&");
-
-		extractor.buildTemplates(samples);
-
-		addFiles(new File(filename));
-
-
-		for (File f : queue) {
-			String content=null;
-			try {
-				content = tika.parseToString(f);
-				List<GeneralizationResult>  res = extractor.doIE( content);
-
-				for(GeneralizationResult gr: res){
-					report.add(new String[]{filename, gr.getText(),  gr.getGen().toString(), gr.getbFire().toString() });
-				}
-
-			} catch (TikaException e) {
-				System.out.println("Tika problem with file" + f.getAbsolutePath());
-			} catch (Exception ee){
-				ee.printStackTrace();
-			}
-			ProfileReaderWriter.writeReport(report, "PII_report.csv");
-		}
-
-		queue.clear();
-	}
-
-
-	public void runExtractor(String filename, String template){
-		String content = null, templateStr = null;
-		try {
-			content = FileUtils.readFileToString(new File(filename));
-			templateStr =  FileUtils.readFileToString(new File(template));
-		} catch (IOException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
-
-		String[] samples = templateStr.split("&");
-
-		extractor.buildTemplates(samples);
-
-		List<GeneralizationResult>  res = extractor.doIE( content);
-		List<String[]> report = new ArrayList<String[]>();
-
-		for(GeneralizationResult gr: res){
-			report.add(new String[]{filename, gr.getText(),  gr.getGen().toString(), gr.getbFire().toString() });
-		}
-
-
-	}
-
-	public static void main(String[] args){
-		//String filename = "/Users/borisgalitsky/Documents/workspace/deepContentInspection/src/test/resources/pii/agreement.txt";
-		
-		if (args ==null || args.length!=3)
-			System.err.println("Usage: java -Xmx10g -jar *.jar path-to-resources path-to-file-to-analyze path-to-file-with_samples\n");
-		try {
-			VerbNetProcessor.getInstance(args[0]);
-			new PersonalInformationExtractor().processDirectory( args[1], args[2]);
-		} catch (IOException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/AbstractEngineRunner.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/AbstractEngineRunner.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/AbstractEngineRunner.java
deleted file mode 100644
index ba8a140..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/AbstractEngineRunner.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.opinion_processor;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.StringUtils;
-
-import opennlp.tools.jsmlearning.ProfileReaderWriter;
-import opennlp.tools.parse_thicket.ParseTreeNode;
-import opennlp.tools.textsimilarity.ParseTreeChunk;
-
-public class AbstractEngineRunner {
-	private List<File> queue;
-	private final static String reviewSource = "/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources/opinions/macbook_pro.txt";
-	NamedEntityExtractor neExtractor = new NamedEntityExtractor();
-	
-	public void processJSONfileWithReviews(){
-		List<String[]> report = new ArrayList<String[]>();
-		report.add(new String[] { "text", "phrases of potential interest list" , });
-
-		
-		String content=null;
-		try {
-			content = FileUtils.readFileToString(new File(reviewSource));
-		} catch (IOException e) {
-			e.printStackTrace();
-		}
-		String[] texts = StringUtils.substringsBetween(content, "reviewText\": \"", "\", \"overall");
-		for(String text: texts){
-			EntityExtractionResult result = neExtractor.extractEntities(text);
-			report.add(new String[]{text});
-			//report.add((String[])result.extractedNERWords.toArray(new String[0]));
-			//report.add((String[])result.extractedSentimentPhrases.toArray(new String[0]));
-			List<String> stringPhrases = new ArrayList<String>(),
-					nodePhrases = new ArrayList<String>();
-			for(List<ParseTreeNode> chList: result.extractedSentimentPhrases){
-				String buf = "", nodeBuf="";
-				for(ParseTreeNode ch: chList){
-					buf+=ch.getWord()+ " ";
-					nodeBuf+=ch.toString()+ " ";
-				}
-				stringPhrases.add(buf.trim());
-				nodePhrases.add(nodeBuf.trim());
-			}
-			report.add((String[])stringPhrases.toArray(new String[0]));
-			report.add((String[])nodePhrases.toArray(new String[0]));
-			report.add(new String[]{"-----------------------------"});
-			ProfileReaderWriter.writeReport(report, "nameEntitiesTopicsOfInterestExtracted.csv");
-		}
-	}
-
-	// this func collects files 
-		private void addFiles(File file) {
-
-			if (!file.exists()) {
-				System.out.println(file + " does not exist.");
-			}
-			if (file.isDirectory()) {
-				for (File f : file.listFiles()) {
-					if (f.getName().startsWith("."))
-						continue;
-					addFiles(f);
-					System.out.println(f.getName());
-				}
-			} else {
-				queue.add(file);
-
-			}
-		}
-	
-	public static void main(String[] args){
-		AbstractEngineRunner runner = new AbstractEngineRunner();
-		runner.processJSONfileWithReviews();
-
-	}
-}
-
-/*
-	public void processDirectory(String path){
-		List<String[]> report = new ArrayList<String[]>();
-		report.add(new String[] { "filename", "named entity list", "phrases of potential interest list" });
-
-		List<String> allNamedEntities = new ArrayList<String>();
-
-		addFiles(new File(path));
-		for(File f: queue){
-			List<String> entities = (List<String>) extractEntities(f.getAbsolutePath()).getFirst();
-			List<String> opinions = (List<String>) extractEntities(f.getAbsolutePath()).getSecond();
-			report.add(new String[]{ f.getName(), entities.toString(),  opinions.toString()});	
-			ProfileReaderWriter.writeReport(report, "nameEntitiesExtracted.csv");
-
-			allNamedEntities.addAll(entities);
-
-			allNamedEntities = new ArrayList<String>(new HashSet<String> (allNamedEntities ));
-
-
-		}
-		ProfileReaderWriter.writeReport(report, "nameEntitiesTopicsOfInterestExtracted.csv");
-	} 
-} */

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/DefaultSentimentProcessor.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/DefaultSentimentProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/DefaultSentimentProcessor.java
deleted file mode 100644
index 44a3640..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/DefaultSentimentProcessor.java
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.opinion_processor;
-
-import java.io.IOException;
-import java.util.List;
-
-import edu.stanford.nlp.util.logging.Redwood;
-
-import java.util.Iterator;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.FileOutputStream;
-import java.io.PrintStream;
-import java.text.DecimalFormat;
-import java.text.NumberFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Properties;
-import java.util.logging.Logger;
-
-import org.ejml.simple.SimpleMatrix;
-
-import edu.stanford.nlp.io.IOUtils;
-import edu.stanford.nlp.ling.CoreAnnotations;
-import edu.stanford.nlp.ling.CoreLabel;
-import edu.stanford.nlp.ling.Label;
-import edu.stanford.nlp.ling.LabeledWord;
-import edu.stanford.nlp.ling.TaggedWord;
-import edu.stanford.nlp.ling.WordLemmaTag;
-import edu.stanford.nlp.ling.WordTag;
-import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
-import edu.stanford.nlp.pipeline.Annotation;
-import edu.stanford.nlp.pipeline.StanfordCoreNLP;
-import edu.stanford.nlp.sentiment.SentimentCoreAnnotations.SentimentAnnotatedTree;
-import edu.stanford.nlp.sentiment.SentimentUtils;
-import edu.stanford.nlp.trees.MemoryTreebank;
-import edu.stanford.nlp.trees.Tree;
-import edu.stanford.nlp.trees.TreeCoreAnnotations;
-import edu.stanford.nlp.util.CoreMap;
-import edu.stanford.nlp.util.Generics;
-import edu.stanford.nlp.ling.CoreAnnotations;
-import edu.stanford.nlp.pipeline.Annotation;
-import edu.stanford.nlp.pipeline.StanfordCoreNLP;
-import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
-import edu.stanford.nlp.trees.Tree;
-import edu.stanford.nlp.util.CoreMap;
-
-public class DefaultSentimentProcessor {
-	/** A logger for this class */
-	private static final Logger log = Logger
-			.getLogger("opennlp.tools.parse_thicket.opinion_processor.DefaultSentimentProcessor");
-
-	private static final NumberFormat NF = new DecimalFormat("0.0000");
-
-	enum Output {
-		PENNTREES, VECTORS, ROOT, PROBABILITIES
-	}
-
-	enum Input {
-		TEXT, TREES
-	}
-
-	/**
-	 * Sets the labels on the tree (except the leaves) to be the integer
-	 * value of the sentiment prediction.  Makes it easy to print out
-	 * with Tree.toString()
-	 */
-	static void setSentimentLabels(Tree tree) {
-		if (tree.isLeaf()) {
-			return;
-		}
-
-		for (Tree child : tree.children()) {
-			setSentimentLabels(child);
-		}
-
-		Label label = tree.label();
-		if (!(label instanceof CoreLabel)) {
-			throw new IllegalArgumentException("Required a tree with CoreLabels");
-		}
-		CoreLabel cl = (CoreLabel) label;
-		cl.setValue(Integer.toString(RNNCoreAnnotations.getPredictedClass(tree)));
-	}
-
-	/**
-	 * Sets the labels on the tree to be the indices of the nodes.
-	 * Starts counting at the root and does a postorder traversal.
-	 */
-	static int setIndexLabels(Tree tree, int index) {
-		if (tree.isLeaf()) {
-			return index;
-		}
-
-		tree.label().setValue(Integer.toString(index));
-		index++;
-		for (Tree child : tree.children()) {
-			index = setIndexLabels(child, index);
-		}
-		return index;
-	}
-
-	/**
-	 * Outputs the vectors from the tree.  Counts the tree nodes the
-	 * same as setIndexLabels.
-	 */
-	static int outputTreeVectors(PrintStream out, Tree tree, int index) {
-		if (tree.isLeaf()) {
-			return index;
-		}
-
-		out.print("  " + index + ":");
-		SimpleMatrix vector = RNNCoreAnnotations.getNodeVector(tree);
-		for (int i = 0; i < vector.getNumElements(); ++i) {
-			out.print("  " + NF.format(vector.get(i)));
-		}
-		out.println();
-		index++;
-		for (Tree child : tree.children()) {
-			index = outputTreeVectors(out, child, index);
-		}
-		return index;
-	}
-
-	/**
-	 * Outputs the scores from the tree.  Counts the tree nodes the
-	 * same as setIndexLabels.
-	 */
-	static int outputTreeScores(PrintStream out, Tree tree, int index) {
-		if (tree.isLeaf()) {
-			return index;
-		}
-
-		out.print("  " + index + ":");
-		SimpleMatrix vector = RNNCoreAnnotations.getPredictions(tree);
-		for (int i = 0; i < vector.getNumElements(); ++i) {
-			out.print("  " + NF.format(vector.get(i)));
-		}
-		out.println();
-		index++;
-		for (Tree child : tree.children()) {
-			index = outputTreeScores(out, child, index);
-		}
-		return index;
-	}
-
-	public static <T> String wordToString(T o, final boolean justValue) {
-		return wordToString(o, justValue, null);
-	}
-
-	public static <T> String wordToString(T o, final boolean justValue,
-			final String separator) {
-		if (justValue && o instanceof Label) {
-			if (o instanceof CoreLabel) {
-				CoreLabel l = (CoreLabel) o;
-				String w = l.value();
-				if (w == null)
-					w = l.word();
-				return w;
-			} else {
-				return (((Label) o).value());
-			}
-		} else if (o instanceof CoreLabel) {
-			CoreLabel l = ((CoreLabel) o);
-			String w = l.value();
-			if (w == null)
-				w = l.word();
-			if (l.tag() != null) {
-				if (separator == null) {
-					return w + CoreLabel.TAG_SEPARATOR + l.tag();
-				} else {
-					return w + separator + l.tag();
-				}
-			}
-			return w;
-			// an interface that covered these next four cases would be
-			// nice, but we're moving away from these data types anyway
-		} else if (separator != null && o instanceof TaggedWord) {
-			return ((TaggedWord) o).toString(separator);
-		} else if (separator != null && o instanceof LabeledWord) {
-			return ((LabeledWord) o).toString();
-		} else if (separator != null && o instanceof WordLemmaTag) {
-			return ((WordLemmaTag) o).toString(separator);
-		} else if (separator != null && o instanceof WordTag) {
-			return ((WordTag) o).toString(separator);
-		} else {
-			return (o.toString());
-		}
-	}
-
-
-	/**
-	 * Returns the sentence as a string with a space between words.
-	 * It prints out the {@code value()} of each item -
-	 * this will give the expected answer for a short form representation
-	 * of the "sentence" over a range of cases.  It is equivalent to
-	 * calling {@code toString(true)}.
-	 *
-	 * TODO: Sentence used to be a subclass of ArrayList, with this
-	 * method as the toString.  Therefore, there may be instances of
-	 * ArrayList being printed that expect this method to be used.
-	 *
-	 * @param list The tokenized sentence to print out
-	 * @return The tokenized sentence as a String
-	 */
-	public static <T> String listToString(List<T> list) {
-		return listToString(list, true);
-	}
-	/**
-	 * Returns the sentence as a string with a space between words.
-	 * Designed to work robustly, even if the elements stored in the
-	 * 'Sentence' are not of type Label.
-	 *
-	 * This one uses the default separators for any word type that uses
-	 * separators, such as TaggedWord.
-	 *
-	 * @param list The tokenized sentence to print out
-	 * @param justValue If {@code true} and the elements are of type
-	 *                  {@code Label}, return just the
-	 *                  {@code value()} of the {@code Label} of each word;
-	 *                  otherwise,
-	 *                  call the {@code toString()} method on each item.
-	 * @return The sentence in String form
-	 */
-	public static <T> String listToString(List<T> list, final boolean justValue) {
-		return listToString(list, justValue, null);
-	}
-
-	/**
-	 * As already described, but if separator is not null, then objects
-	 * such as TaggedWord
-	 *
-	 * @param separator The string used to separate Word and Tag
-	 *                  in TaggedWord, etc
-	 */
-	public static <T> String listToString(List<T> list, final boolean justValue,
-			final String separator) {
-		StringBuilder s = new StringBuilder();
-		for (Iterator<T> wordIterator = list.iterator(); wordIterator.hasNext();) {
-			T o = wordIterator.next();
-			s.append(wordToString(o, justValue, separator));
-			if (wordIterator.hasNext()) {
-				s.append(' ');
-			}
-		}
-		return s.toString();
-	}
-
-	/**
-	 * Outputs a tree using the output style requested
-	 */
-	static void outputTree(PrintStream out, CoreMap sentence, List<Output> outputFormats) {
-		Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
-		for (Output output : outputFormats) {
-			switch (output) {
-			case PENNTREES: {
-				Tree copy = tree.deepCopy();
-				setSentimentLabels(copy);
-				out.println(copy);
-				break;
-			}
-			case VECTORS: {
-				Tree copy = tree.deepCopy();
-				setIndexLabels(copy, 0);
-				out.println(copy);
-				outputTreeVectors(out, tree, 0);
-				break;
-			}
-			case ROOT: {
-				out.println("  " + sentence.get(SentimentCoreAnnotations.SentimentClass.class));
-				break;
-			}
-			case PROBABILITIES: {
-				Tree copy = tree.deepCopy();
-				setIndexLabels(copy, 0);
-				out.println(copy);
-				outputTreeScores(out, tree, 0);
-				break;
-			}
-			default:
-				throw new IllegalArgumentException("Unknown output format " + output);
-			}
-		}
-	}
-
-	/**
-	 * Reads an annotation from the given filename using the requested input.
-	 */
-	public static List<Annotation> getAnnotations(StanfordCoreNLP tokenizer, Input inputFormat, String filename, boolean filterUnknown) {
-		switch (inputFormat) {
-		case TEXT: {
-			String text = IOUtils.slurpFileNoExceptions(filename);
-			Annotation annotation = new Annotation(text);
-			tokenizer.annotate(annotation);
-			List<Annotation> annotations = Generics.newArrayList();
-			for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
-				Annotation nextAnnotation = new Annotation(sentence.get(CoreAnnotations.TextAnnotation.class));
-				nextAnnotation.set(CoreAnnotations.SentencesAnnotation.class, Collections.singletonList(sentence));
-				annotations.add(nextAnnotation);
-			}
-			return annotations;
-		}
-		case TREES: {
-			List<Tree> trees;
-			if (filterUnknown) {
-				trees = SentimentUtils.readTreesWithGoldLabels(filename);
-				trees = SentimentUtils.filterUnknownRoots(trees);
-			} else {
-				trees = Generics.newArrayList();
-				MemoryTreebank treebank = new MemoryTreebank("utf-8");
-				treebank.loadPath(filename, null);
-				for (Tree tree : treebank) {
-					trees.add(tree);
-				}
-			}
-
-			List<Annotation> annotations = Generics.newArrayList();
-			for (Tree tree : trees) {
-				CoreMap sentence = new Annotation(listToString(tree.yield()));
-				sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
-				List<CoreMap> sentences = Collections.singletonList(sentence);
-				Annotation annotation = new Annotation("");
-				annotation.set(CoreAnnotations.SentencesAnnotation.class, sentences);
-				annotations.add(annotation);
-			}
-			return annotations;
-		}
-		default:
-			throw new IllegalArgumentException("Unknown format " + inputFormat);
-		}
-	}
-
-	/** Runs the tree-based sentiment model on some text. */
-	public void processTextWithArgs(String[] args) throws IOException {
-		String parserModel = null;
-		String sentimentModel = null;
-
-		String filename = null;
-		String fileList = null;
-		boolean stdin = false;
-
-		boolean filterUnknown = false;
-
-		List<Output> outputFormats = Collections.singletonList(Output.ROOT);
-		Input inputFormat = Input.TEXT;
-
-		String tlppClass = "DEFAULT_TLPP_CLASS";
-
-		for (int argIndex = 0; argIndex < args.length; ) {
-			if (args[argIndex].equalsIgnoreCase("-sentimentModel")) {
-				sentimentModel = args[argIndex + 1];
-				argIndex += 2;
-			} else if (args[argIndex].equalsIgnoreCase("-parserModel")) {
-				parserModel = args[argIndex + 1];
-				argIndex += 2;
-			} else if (args[argIndex].equalsIgnoreCase("-file")) {
-				filename = args[argIndex + 1];
-				argIndex += 2;
-			} else if (args[argIndex].equalsIgnoreCase("-fileList")) {
-				fileList = args[argIndex + 1];
-				argIndex += 2;
-			} else if (args[argIndex].equalsIgnoreCase("-stdin")) {
-				stdin = true;
-				argIndex++;
-			} else if (args[argIndex].equalsIgnoreCase("-input")) {
-				inputFormat = Input.valueOf(args[argIndex + 1].toUpperCase());
-				argIndex += 2;
-			} else if (args[argIndex].equalsIgnoreCase("-output")) {
-				String[] formats = args[argIndex + 1].split(",");
-				outputFormats = new ArrayList<>();
-				for (String format : formats) {
-					outputFormats.add(Output.valueOf(format.toUpperCase()));
-				}
-				argIndex += 2;
-			} else if (args[argIndex].equalsIgnoreCase("-filterUnknown")) {
-				filterUnknown = true;
-				argIndex++;
-			} else if (args[argIndex].equalsIgnoreCase("-tlppClass")) {
-				tlppClass = args[argIndex + 1];
-				argIndex += 2;
-			} else if (args[argIndex].equalsIgnoreCase("-help")) {
-				System.exit(0);
-			} else {
-				log.info("Unknown argument " + args[argIndex + 1]);
-				throw new IllegalArgumentException("Unknown argument " + args[argIndex + 1]);
-			}
-		}
-
-		// We construct two pipelines.  One handles tokenization, if
-		// necessary.  The other takes tokenized sentences and converts
-		// them to sentiment trees.
-		Properties pipelineProps = new Properties();
-		Properties tokenizerProps = null;
-		if (sentimentModel != null) {
-			pipelineProps.setProperty("sentiment.model", sentimentModel);
-		}
-		if (parserModel != null) {
-			pipelineProps.setProperty("parse.model", parserModel);
-		}
-		if (inputFormat == Input.TREES) {
-			pipelineProps.setProperty("annotators", "binarizer, sentiment");
-			pipelineProps.setProperty("customAnnotatorClass.binarizer", "edu.stanford.nlp.pipeline.BinarizerAnnotator");
-			pipelineProps.setProperty("binarizer.tlppClass", tlppClass);
-			pipelineProps.setProperty("enforceRequirements", "false");
-		} else {
-			pipelineProps.setProperty("annotators", "parse, sentiment");
-			pipelineProps.setProperty("enforceRequirements", "false");
-			tokenizerProps = new Properties();
-			tokenizerProps.setProperty("annotators", "tokenize, ssplit");
-		}
-
-		if (stdin && tokenizerProps != null) {
-			tokenizerProps.setProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "true");
-		}
-
-		int count = 0;
-		if (filename != null) count++;
-		if (fileList != null) count++;
-		if (stdin) count++;
-		if (count > 1) {
-			throw new IllegalArgumentException("Please only specify one of -file, -fileList or -stdin");
-		}
-		if (count == 0) {
-			throw new IllegalArgumentException("Please specify either -file, -fileList or -stdin");
-		}
-
-		StanfordCoreNLP tokenizer = (tokenizerProps == null) ? null : new StanfordCoreNLP(tokenizerProps);
-		StanfordCoreNLP pipeline = new StanfordCoreNLP(pipelineProps);
-
-		if (filename != null) {
-			// Process a file.  The pipeline will do tokenization, which
-			// means it will split it into sentences as best as possible
-			// with the tokenizer.
-			List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, filename, filterUnknown);
-			for (Annotation annotation : annotations) {
-				pipeline.annotate(annotation);
-
-				for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
-					System.out.println(sentence);
-					outputTree(System.out, sentence, outputFormats);
-				}
-			}
-		} else if (fileList != null) {
-			// Process multiple files.  The pipeline will do tokenization,
-			// which means it will split it into sentences as best as
-			// possible with the tokenizer.  Output will go to filename.out
-			// for each file.
-			for (String file : fileList.split(",")) {
-				List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, file, filterUnknown);
-				FileOutputStream fout = new FileOutputStream(file + ".out");
-				PrintStream pout = new PrintStream(fout);
-				for (Annotation annotation : annotations) {
-					pipeline.annotate(annotation);
-
-					for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
-						pout.println(sentence);
-						outputTree(pout, sentence, outputFormats);
-					}
-				}
-				pout.flush();
-				fout.close();
-			}
-		} else {
-			// Process stdin.  Each line will be treated as a single sentence.
-			log.info("Reading in text from stdin.");
-			log.info("Please enter one sentence per line.");
-			log.info("Processing will end when EOF is reached.");
-			BufferedReader reader = IOUtils.readerFromStdin("utf-8");
-
-			for (String line; (line = reader.readLine()) != null; ) {
-				line = line.trim();
-				if ( ! line.isEmpty()) {
-					Annotation annotation = tokenizer.process(line);
-					pipeline.annotate(annotation);
-					for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
-						outputTree(System.out, sentence, outputFormats);
-					}
-				} else {
-					// Output blank lines for blank lines so the tool can be
-					// used for line-by-line text processing
-					System.out.println();
-				}
-			}
-
-		}
-	}
-
-	public float getNumericSentimentValue(String expression) {
-		Properties props = new Properties();
-		props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
-		StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
-		int mainSentiment = 0;
-		if (expression != null && expression.length() > 0) {
-			int longest = 0;
-			Annotation annotation = pipeline.process(expression);
-			for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
-				Tree tree = sentence.get(SentimentAnnotatedTree.class);
-				int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
-				String partText = sentence.toString();
-				if (partText.length() > longest) {
-					mainSentiment = sentiment;
-					longest = partText.length();
-				}
-			}
-		}
-		return mainSentiment;
-	}
-}

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/EntityExtractionResult.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/EntityExtractionResult.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/EntityExtractionResult.java
deleted file mode 100644
index 69eae1d..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/EntityExtractionResult.java
+++ /dev/null
@@ -1,158 +0,0 @@
-package opennlp.tools.parse_thicket.opinion_processor;
-
-import java.util.List;
-
-import opennlp.tools.parse_thicket.ParseTreeNode;
-import opennlp.tools.similarity.apps.HitBase;
-
-public class EntityExtractionResult {
-	List<List<ParseTreeNode>> extractedNERs;
-	public List<String> extractedNERWords;
-	// phrases w/sentiments
-	public List<List<ParseTreeNode>> extractedSentimentPhrases;
-	public List<String> extractedSentimentPhrasesStr;
-	// phrases w/o sentiments
-	public List<List<ParseTreeNode>> extractedNONSentimentPhrases;
-	public List<String> extractedNONSentimentPhrasesStr;
-	public List<Float> sentimentProfile;
-	
-	
-	public List<String> getExtractedSentimentPhrasesStr() {
-		return extractedSentimentPhrasesStr;
-	}
-
-	public void setExtractedSentimentPhrasesStr(List<String> extractedSentimentPhrasesStr) {
-		this.extractedSentimentPhrasesStr = extractedSentimentPhrasesStr;
-	}
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License. You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-	public List<List<ParseTreeNode>> getExtractedNONSentimentPhrases() {
-		return extractedNONSentimentPhrases;
-	}
-
-	public void setExtractedNONSentimentPhrases(List<List<ParseTreeNode>> extractedNONSentimentPhrases) {
-		this.extractedNONSentimentPhrases = extractedNONSentimentPhrases;
-	}
-
-	public List<String> getExtractedNONSentimentPhrasesStr() {
-		return extractedNONSentimentPhrasesStr;
-	}
-
-	public void setExtractedNONSentimentPhrasesStr(List<String> extractedNONSentimentPhrasesStr) {
-		this.extractedNONSentimentPhrasesStr = extractedNONSentimentPhrasesStr;
-	}
-
-	public List<HitBase> hits;
-	private List<List<ParseTreeNode>> extractedNerPhrases;
-	private List<String> extractedNerPhrasesStr;
-	private List<String> extractedNerPhraseTags;
-	private List<List<ParseTreeNode>> extractedNerExactPhrases;
-	private List<String> extractedNerExactStr;
-
-	public void setExtractedNERWords(List<String> extractedNERWords) {
-		this.extractedNERWords = extractedNERWords;
-	}
-
-	public void setExtractedSentimentPhrases(List<List<ParseTreeNode>> extractedSentimentPhrases) {
-		this.extractedSentimentPhrases = extractedSentimentPhrases;
-	}
-
-	public void setExtractedNER(List<List<ParseTreeNode>> extractedNERs) {
-		this.extractedNERs = extractedNERs;
-	}
-
-	public void setGossipHits(List<HitBase> hitsForAnEntity) {
-		hits = hitsForAnEntity;
-	}
-
-	public List<List<ParseTreeNode>> getExtractedNERs() {
-		return extractedNERs;
-	}
-
-	public void setExtractedNERs(List<List<ParseTreeNode>> extractedNERs) {
-		this.extractedNERs = extractedNERs;
-	}
-
-	public List<HitBase> getHits() {
-		return hits;
-	}
-
-	public void setHits(List<HitBase> hits) {
-		this.hits = hits;
-	}
-
-	public List<String> getExtractedNERWords() {
-		return extractedNERWords;
-	}
-
-	public List<List<ParseTreeNode>> getExtractedSentimentPhrases() {
-		return extractedSentimentPhrases;
-	}
-
-	public void setSentimentProfile(List<Float> sentimentProfile) {
-	    this.sentimentProfile = sentimentProfile;
-    }
-
-	public List<Float> getSentimentProfile() {
-		return sentimentProfile;
-	}
-
-	public void setExtractedNerPhrases(List<List<ParseTreeNode>> extractedNerPhrases) {
-	    this.extractedNerPhrases = extractedNerPhrases;
-	    
-    }
-
-	public void setExtractedNerPhrasesStr(List<String> extractedNerPhrasesStr) {
-	    this.extractedNerPhrasesStr = extractedNerPhrasesStr;
-	    
-    }
-
-	public List<List<ParseTreeNode>> getExtractedNerPhrases() {
-		return extractedNerPhrases;
-	}
-
-	public List<String> getExtractedNerPhrasesStr() {
-		return extractedNerPhrasesStr;
-	}
-
-	public void setExtractedNerPhraseTags(List<String> extractedNerPhraseTags) {
-	    this.extractedNerPhraseTags = extractedNerPhraseTags;	    
-    }
-
-	public List<String> getExtractedNerPhraseTags() {
-	    return this.extractedNerPhraseTags;    
-    }
-
-	public void setExtractedNerExactPhrases(List<List<ParseTreeNode>> extractedNerExactPhrases) {
-	   this.extractedNerExactPhrases = extractedNerExactPhrases;
-	    
-    }
-
-	public void setExtractedNerExactStr(List<String> extractedNerExactStr) {
-	    this.extractedNerExactStr = extractedNerExactStr;
-	    
-    }
-
-	public List<List<ParseTreeNode>> getExtractedNerExactPhrases() {
-		return extractedNerExactPhrases;
-	}
-
-	public List<String> getExtractedNerExactStr() {
-		return extractedNerExactStr;
-	}
-	
-}

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/2707f665/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/ExpressionSentimentAnalyzer.java
----------------------------------------------------------------------
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/ExpressionSentimentAnalyzer.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/ExpressionSentimentAnalyzer.java
deleted file mode 100644
index dc89d8b..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/ExpressionSentimentAnalyzer.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.parse_thicket.opinion_processor;
-
-import java.util.Properties;
-
-import edu.stanford.nlp.ling.CoreAnnotations;
-import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
-import edu.stanford.nlp.pipeline.Annotation;
-import edu.stanford.nlp.pipeline.StanfordCoreNLP;
-import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
-import edu.stanford.nlp.sentiment.SentimentCoreAnnotations.SentimentAnnotatedTree;
-import edu.stanford.nlp.trees.Tree;
-import edu.stanford.nlp.util.CoreMap;
-
-public class ExpressionSentimentAnalyzer {
-	float findSentiment(String line) {
-		Properties props = new Properties();
-		props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
-		StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
-		int mainSentiment = 0;
-		if (line != null && line.length() > 0) {
-			int longest = 0;
-			Annotation annotation = pipeline.process(line);
-			for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
-				Tree tree = sentence.get(SentimentAnnotatedTree.class);
-				int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
-				String partText = sentence.toString();
-				if (partText.length() > longest) {
-					mainSentiment = sentiment;
-					longest = partText.length();
-				}
-			}
-		}
-		return mainSentiment;
-	}
-
-	public static void main(String[] args) {
-		float sent = new ExpressionSentimentAnalyzer().findSentiment("poor president nomee Hilary Clinton visited Mexico");
-		System.out.println(sent);
-	}
-}
\ No newline at end of file


Mime
View raw message