ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1514740 - in /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference: ae/GoldCoreferenceReader.java ae/NamedEntityCoreferenceResolver.java eval/EvaluationOfCoreferencePairs.java util/SpanAlignment.java
Date Fri, 16 Aug 2013 14:48:42 GMT
Author: tmill
Date: Fri Aug 16 14:48:42 2013
New Revision: 1514740

URL: http://svn.apache.org/r1514740
Log:
Partially working version of pair evaluation and classifier. Not optimized for performance.

Added:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/util/SpanAlignment.java
Modified:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/GoldCoreferenceReader.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/NamedEntityCoreferenceResolver.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/GoldCoreferenceReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/GoldCoreferenceReader.java?rev=1514740&r1=1514739&r2=1514740&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/GoldCoreferenceReader.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/GoldCoreferenceReader.java
Fri Aug 16 14:48:42 2013
@@ -42,7 +42,7 @@ public class GoldCoreferenceReader exten
 	public void process(JCas jcas) throws AnalysisEngineProcessException {
 		HashMap<String, Integer> goldSpan2id = new HashMap<String, Integer>();
 		ArrayList<Span> goldSpans = new ArrayList<Span>();
-		HashMap<String[], IdentifiedAnnotation> mentions = new HashMap<String[], IdentifiedAnnotation>();
+		HashMap<String, IdentifiedAnnotation> mentions = new HashMap<String, IdentifiedAnnotation>();
 		
 		String docId = DocumentIDAnnotationUtil.getDocumentID(jcas);
 		File f = new File(goldDir + File.separator + docId);
@@ -64,9 +64,13 @@ public class GoldCoreferenceReader exten
 					anteMention = new IdentifiedAnnotation(jcas);
 	        anteMention.setBegin(a[0]);
 	        anteMention.setEnd(a[a.length-1]);					
-					mentions.put(spanPair, anteMention);
+					mentions.put(spanPair[0], anteMention);
 				}else{
-				  anteMention = mentions.get(spanPair);
+				  anteMention = mentions.get(spanPair[0]);
+				  if(anteMention == null){
+				    br.close();
+				    throw new AnalysisEngineProcessException("Antecedent is still null!", new Object[]{});
+				  }
 				}
 				IdentifiedAnnotation anaMention = null; //new IdentifiedAnnotation(jcas);
 				if (!goldSpan2id.containsKey(spanPair[1])){
@@ -79,9 +83,13 @@ public class GoldCoreferenceReader exten
 					anaMention = new IdentifiedAnnotation(jcas);
 					anaMention.setBegin(a[0]);
 					anaMention.setEnd(a[a.length-1]);
-					mentions.put(spanPair, anaMention);
+					mentions.put(spanPair[1], anaMention);
 				}else{
-				  anaMention = mentions.get(spanPair);
+				  anaMention = mentions.get(spanPair[1]);
+				  if(anaMention == null){
+            br.close();
+            throw new AnalysisEngineProcessException("Anaphor is still null!", new Object[]{});
			    
+				  }
 				}
 				
 				RelationArgument arg1 = new RelationArgument(jcas);

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/NamedEntityCoreferenceResolver.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/NamedEntityCoreferenceResolver.java?rev=1514740&r1=1514739&r2=1514740&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/NamedEntityCoreferenceResolver.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/NamedEntityCoreferenceResolver.java
Fri Aug 16 14:48:42 2013
@@ -3,21 +3,34 @@ package org.apache.ctakes.coreference.ae
 import java.io.File;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
 
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
 import org.apache.ctakes.coreference.ae.features.DistanceFeatureExtractor;
 import org.apache.ctakes.coreference.ae.features.StringMatchingFeatureExtractor;
 import org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor;
 import org.apache.ctakes.coreference.ae.features.UMLSFeatureExtractor;
 import org.apache.ctakes.coreference.util.CorefConst;
+import org.apache.ctakes.coreference.util.Span;
+import org.apache.ctakes.coreference.util.SpanAlignment;
 import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.CoreferenceRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.log4j.Logger;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.jcas.tcas.DocumentAnnotation;
@@ -40,6 +53,8 @@ public class NamedEntityCoreferenceResol
         NamedEntityCoreferenceResolver.class,
         CleartkAnnotator.PARAM_IS_TRAINING,
         true,
+        RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+        0.1f,
         DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
         dataWriterClass,
         DefaultDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
@@ -56,6 +71,16 @@ public class NamedEntityCoreferenceResol
         new File(modelDirectory, "model.jar"));
   }
 
+  private HashMap<IdentifiedAnnotation, IdentifiedAnnotation> sys2gold = new HashMap<IdentifiedAnnotation,
IdentifiedAnnotation>();
+  
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+    String docId = DocumentIDAnnotationUtil.getDocumentID(jCas);
+    System.err.println("docId = " + docId);
+//    initializeEntityMapping(jCas);
+    super.process(jCas);
+  }
+  
 	@Override
 	protected List<RelationFeaturesExtractor> getFeatureExtractors() {
 		List<RelationFeaturesExtractor> extractors = new ArrayList<RelationFeaturesExtractor>();
@@ -117,5 +142,46 @@ public class NamedEntityCoreferenceResol
 	protected Class<? extends Annotation> getCoveringClass() {
 		return DocumentAnnotation.class;
 	}
+	
+	@Override
+	protected Class<? extends BinaryTextRelation> getRelationClass() {
+	  return CoreferenceRelation.class;
+	}
 
+	@Override
+	protected void createRelation(JCas jCas, IdentifiedAnnotation arg1,
+	    IdentifiedAnnotation arg2, String predictedCategory) {
+    // add the relation to the CAS
+    RelationArgument relArg1 = new RelationArgument(jCas);
+    relArg1.setArgument(arg1);
+    relArg1.setRole("Antecedent");
+    relArg1.addToIndexes();
+    RelationArgument relArg2 = new RelationArgument(jCas);
+    relArg2.setArgument(arg2);
+    relArg2.setRole("Anaphor");
+    relArg2.addToIndexes();
+    CoreferenceRelation relation = new CoreferenceRelation(jCas);
+    relation.setArg1(relArg1);
+    relation.setArg2(relArg2);
+    relation.setCategory(predictedCategory);
+    relation.addToIndexes();
+	}
+	
+  /**
+   * Looks up the arguments in the specified lookup table and converts the
+   * relation into a label for classification
+   * 
+   * @return First map the system arguments it receives onto the gold arguments
+   * from the training data then return whatever the parent says about the relation
+   * between those gold entities
+   */
+//	@Override
+//	protected String getRelationCategory(
+//	    Map<List<Annotation>, BinaryTextRelation> relationLookup,
+//	    IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
+//	  IdentifiedAnnotation arg1gold = sys2gold.get(arg1);
+//	  IdentifiedAnnotation arg2gold = sys2gold.get(arg2);
+//	  return super.getRelationCategory(relationLookup, arg1gold, arg2gold);
+//	}
+	
 }

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java?rev=1514740&r1=1514739&r2=1514740&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java
Fri Aug 16 14:48:42 2013
@@ -2,18 +2,26 @@ package org.apache.ctakes.coreference.ev
 
 import java.io.File;
 import java.net.URI;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
 import org.apache.ctakes.coreference.ae.NamedEntityCoreferenceResolver;
+import org.apache.ctakes.coreference.util.Span;
+import org.apache.ctakes.coreference.util.SpanAlignment;
 import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
 import org.apache.ctakes.relationextractor.eval.XMIReader;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
 import org.apache.ctakes.typesystem.type.relation.CoreferenceRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.structured.DocumentID;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -22,6 +30,7 @@ import org.apache.uima.analysis_engine.A
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.FeatureStructure;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.TOP;
@@ -64,7 +73,7 @@ public class EvaluationOfCoreferencePair
         aliases = "--train", 
         usage = "specify the directory contraining the xmis for the training partition",
         required = true)
-    public File traingDirectory;
+    public File trainingDirectory;
     
     @Option(name = "--print-errors", required = false)
     public boolean printErrors=false;
@@ -108,8 +117,8 @@ public class EvaluationOfCoreferencePair
 			throws Exception {
 	  AggregateBuilder aggregateBuilder = new AggregateBuilder();
  
-//	  aggregateBuilder.add(RemoveSystemMarkables.class);
-    aggregateBuilder.add(CopyFromGold.getDescription(CoreferenceRelation.class));
+	  aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CreateSystemRelations.class));
+    aggregateBuilder.add(CopyFromGold.getDescription(DocumentID.class));
 	  aggregateBuilder.add(
 	      NamedEntityCoreferenceResolver.createDataWriterDescription(
 	          LIBSVMStringOutcomeDataWriter.class,
@@ -132,7 +141,8 @@ public class EvaluationOfCoreferencePair
 			File directory) throws Exception {
     AggregateBuilder aggregateBuilder = new AggregateBuilder();
     
-    aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class, EntityMention.class));
+    aggregateBuilder.add(CopyFromGold.getDescription(DocumentID.class));
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(MapGoldRelations.class));
     aggregateBuilder.add(
         NamedEntityCoreferenceResolver.createAnnotatorDescription(directory)
             );
@@ -240,6 +250,53 @@ public class EvaluationOfCoreferencePair
     return getXMIFile(xmiDirectory, new File(ViewURIUtil.getURI(jCas).getPath()));
   }
 
+  static HashMap<IdentifiedAnnotation,IdentifiedAnnotation> initializeEntityMapping(JCas
systemView, JCas goldView){
+    HashMap<IdentifiedAnnotation,IdentifiedAnnotation> gold2sys = new HashMap<IdentifiedAnnotation,
IdentifiedAnnotation>();
+    // create hashmap of system to gold entities (don't bother at test time b/c no gold entities)
+    ArrayList<IdentifiedAnnotation> sysMarkables = new ArrayList<IdentifiedAnnotation>(JCasUtil.select(systemView,
EntityMention.class));
+    sysMarkables.addAll(JCasUtil.select(systemView, EventMention.class));
+    Collections.sort(sysMarkables, new AnnotationComparator());
+    Span[] sysSpans = new Span[sysMarkables.size()];
+    int i = 0;
+    for(IdentifiedAnnotation sysMarkable : sysMarkables){
+      sysSpans[i] = new Span(new int[]{sysMarkable.getBegin(), sysMarkable.getEnd()});
+      i++;
+    }
+
+    Collection<CoreferenceRelation> goldRels = JCasUtil.select(goldView, CoreferenceRelation.class);
+    Set<IdentifiedAnnotation> goldEntities = new HashSet<IdentifiedAnnotation>();
+    for(CoreferenceRelation rel : goldRels){
+      goldEntities.add((IdentifiedAnnotation)rel.getArg1().getArgument());
+      goldEntities.add((IdentifiedAnnotation)rel.getArg2().getArgument());
+    }
+    List<IdentifiedAnnotation> goldMarkables = new ArrayList<IdentifiedAnnotation>(goldEntities);
+    Collections.sort(goldMarkables, new AnnotationComparator());
+
+    Span[] goldSpans = new Span[goldMarkables.size()];
+    i = 0;
+    for(Annotation goldMarkable : goldMarkables){
+      goldSpans[i] = new Span(new int[]{goldMarkable.getBegin(), goldMarkable.getEnd()});
+      i++;
+    }
+    SpanAlignment align = new SpanAlignment(goldSpans, sysSpans);
+    int[] goldAlign = align.get1();
+    int[] sysAlign = align.get2();
+
+    int j = 0; i = 0;
+    while(i < goldAlign.length && j < sysAlign.length){
+      if(goldAlign[i] == sysAlign[j]){
+        // we have a mapped entity:
+          gold2sys.put(goldMarkables.get(i), sysMarkables.get(j));
+          i++; j++;
+      }else if(goldAlign[i] < sysAlign[j]){
+        i++;
+      }else if(sysAlign[j] < goldAlign[i]){
+        j++;
+      }
+    }
+    return gold2sys;
+  }
+
   public static class CopyFromGold extends JCasAnnotator_ImplBase {
 
     public static AnalysisEngineDescription getDescription(Class<?>... classes)
@@ -285,6 +342,94 @@ public class EvaluationOfCoreferencePair
     }
   }
 
+  public static class CreateSystemRelations extends JCasAnnotator_ImplBase{
+
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+      JCas systemView, goldView;
+      try {
+        systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+        goldView = jCas.getView(GOLD_VIEW_NAME);
+        HashMap<IdentifiedAnnotation,IdentifiedAnnotation> gold2sys = initializeEntityMapping(systemView,
goldView);
+        Collection<CoreferenceRelation> goldRels = JCasUtil.select(goldView, CoreferenceRelation.class);
+        for(CoreferenceRelation goldRel : goldRels){
+          IdentifiedAnnotation goldArg1 = (IdentifiedAnnotation) goldRel.getArg1().getArgument();
+          IdentifiedAnnotation goldArg2 = (IdentifiedAnnotation) goldRel.getArg2().getArgument();
+          IdentifiedAnnotation sysArg1 = gold2sys.get(goldArg1);
+          IdentifiedAnnotation sysArg2 = gold2sys.get(goldArg2);
+//          sysArg1.addToIndexes(goldView);
+//          sysArg2.addToIndexes(goldView);
+          RelationArgument sysRelArg1 = new RelationArgument(systemView);
+          sysRelArg1.setArgument(sysArg1);
+          RelationArgument sysRelArg2 = new RelationArgument(systemView);
+          sysRelArg2.setArgument(sysArg2);
+          CoreferenceRelation sysRel = new CoreferenceRelation(systemView);
+          sysRel.setArg1(sysRelArg1);
+          sysRel.setArg2(sysRelArg2);
+          sysRel.setCategory(goldRel.getCategory());
+          sysRel.addToIndexes();
+          
+//          goldRel.setArg1(sysRelArg1);
+//          goldRel.setArg2(sysRelArg2);
+//          goldRel.addToIndexes();
+//          toRemove.add(goldRel.getArg1());
+//          toRemove.add(goldRel.getArg2());
+//          toRemove.add(goldRel);
+        }
+//        for(TOP rm : toRemove){
+//          rm.removeFromIndexes();
+//        }
+      } catch (CASException e) {
+        e.printStackTrace();
+        throw new AnalysisEngineProcessException(e);
+      }
+    }
+  }
+  
+  public static class MapGoldRelations extends JCasAnnotator_ImplBase {
+
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+      JCas systemView, goldView;
+      try {
+        systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+        goldView = jCas.getView(GOLD_VIEW_NAME);
+        HashMap<IdentifiedAnnotation,IdentifiedAnnotation> gold2sys = initializeEntityMapping(systemView,
goldView);
+        Collection<CoreferenceRelation> goldRels = JCasUtil.select(goldView, CoreferenceRelation.class);
+        List<Annotation> toRemove = new ArrayList<Annotation>();
+        for(CoreferenceRelation goldRel : goldRels){
+          IdentifiedAnnotation goldArg1 = (IdentifiedAnnotation) goldRel.getArg1().getArgument();
+          IdentifiedAnnotation goldArg2 = (IdentifiedAnnotation) goldRel.getArg2().getArgument();
+          IdentifiedAnnotation sysArg1 = gold2sys.get(goldArg1);
+          IdentifiedAnnotation sysArg2 = gold2sys.get(goldArg2);
+          if(sysArg1 != null){
+            RelationArgument sysRelArg1 = new RelationArgument(goldView);
+            sysRelArg1.setArgument(sysArg1);
+            goldRel.setArg1(sysRelArg1);
+          }
+          if(sysArg2 != null){
+            RelationArgument sysRelArg2 = new RelationArgument(goldView);
+            sysRelArg2.setArgument(sysArg2);
+            goldRel.setArg2(sysRelArg2);
+          }
+        }
+      }catch (CASException e) {
+        e.printStackTrace();
+        throw new AnalysisEngineProcessException(e);
+      }
+    }
+    
+  }
+  
+  static class AnnotationComparator implements Comparator<Annotation>{
+
+    @Override
+    public int compare(Annotation arg0, Annotation arg1) {
+      return arg0.getBegin() - arg1.getBegin();
+    }
+    
+  }
+
 	/**
 	 * @param args
 	 * @throws Exception 
@@ -292,7 +437,7 @@ public class EvaluationOfCoreferencePair
 	public static void main(String[] args) throws Exception {
     Options options = new Options();
     options.parseOptions(args);
-	  List<File> trainItems = getFiles(options.traingDirectory);
+	  List<File> trainItems = getFiles(options.trainingDirectory);
 	  List<File> testItems = getFiles(options.testDirectory);
 	  
 	  EvaluationOfCoreferencePairs eval = new EvaluationOfCoreferencePairs(new File("target/models/"),
options.printErrors, options.printRelations);

Added: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/util/SpanAlignment.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/util/SpanAlignment.java?rev=1514740&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/util/SpanAlignment.java
(added)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/util/SpanAlignment.java
Fri Aug 16 14:48:42 2013
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.coreference.util;
+
+public class SpanAlignment {
+
+	int[] id1;
+	int[] id2;
+
+	public SpanAlignment (Span[] s1, Span[] s2) {
+		double[][] scores;
+		int[][] track; // 0 = left, 1 = upper left, 2 = up
+		int l1 = s1.length;
+		int l2 = s2.length;
+		scores = new double[l1+1][l2+1];
+		track = new int[l1+1][l2+1];
+		id1 = new int[l1];
+		id2 = new int[l2];
+		scores[0][0] = 0.0;
+		track[0][0] = -1;
+
+		for (int i = 1; i <= l1; i++) {
+			scores[i][0] = s1[i-1].gap() * i;
+			track[i][0] = 2;
+		}
+		for (int i = 1; i <= l2; i++) {
+			scores[0][i] = s2[i-1].gap() * i;
+			track[0][i] = 0;
+		}
+
+		for (int i = 1; i <= l1; i++)
+			for (int j = 1; j <=l2; j++) {
+				double match = scores[i-1][j-1] + Span.score(s1[i-1], s2[j-1]);
+				double gap1 = scores[i][j-1] + s1[i-1].gap();
+				double gap2 = scores[i-1][j] + s2[j-1].gap();
+				if (match>=gap1 && match>=gap2) {
+					scores[i][j] = match;
+					track[i][j] = 1;
+				}
+				else if (gap1>=match && gap1>=gap2) {
+					scores[i][j] = gap1;
+					track[i][j] = 0;
+				}
+				else {
+					scores[i][j] = gap2;
+					track[i][j] = 2;
+				}
+			}
+
+		int i = l1;
+		int j = l2;
+		int id = 0;
+//		StringBuffer sb1 = new StringBuffer();
+//		StringBuffer sb2 = new StringBuffer();
+		while (i>0 || j>0) {
+			int dir = track[i][j];
+			switch (dir) {
+			case 0: id2[--j] = id++; break;//sb1.insert(0, "_"); sb2.insert(0,s2[j]); break;
+			case 1: id1[--i] = id; id2[--j] = id++; break;//sb1.insert(0,s1[i]); sb2.insert(0,s2[j]);
break;
+			case 2: id1[--i] = id++; //sb1.insert(0,s1[i]); sb2.insert(0,"_");
+			}
+		}
+		for (int k = 0; k < l1; k++)
+			id1[k] = id - id1[k];
+		for (int k = 0; k < l2; k++)
+			id2[k] = id - id2[k];
+
+//		for (int m = 0; m < scores.length; m++) {
+//			for (int n = 0; n < scores[0].length; n++)
+//				System.out.print(scores[m][n]+" ");
+//			System.out.println();
+//		}
+//		for (int m = 0; m < track.length; m++) {
+//			for (int n = 0; n < track[0].length; n++)
+//				System.out.print(track[m][n]==0?"-":(track[m][n]==1?"\\":"|"));
+//			System.out.println();
+//		}
+	}
+
+	public int[] get1 () { return id1; }
+	public int get1 (int i) { return id1[i]; }
+	public int[] get2 () { return id2; }
+	public int get2 (int i) { return id2[i]; }
+	public int getMaxID () {
+		int mid1 = id1.length>0 ? id1[id1.length-1] : 0;
+		int mid2 = id2.length>0 ? id2[id2.length-1] : 0;
+		return mid1>mid2 ? mid1 : mid2;
+	}
+}



Mime
View raw message