uima-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sc...@apache.org
Subject svn commit: r991931 [1/2] - in /uima/uimaj/trunk/uimaj-core/src: main/java/org/apache/uima/analysis_engine/impl/ main/java/org/apache/uima/analysis_engine/impl/compatibility/ main/java/org/apache/uima/cas/impl/ test/java/org/apache/uima/analysis_engine...
Date Thu, 02 Sep 2010 13:43:51 GMT
Author: schor
Date: Thu Sep  2 13:43:50 2010
New Revision: 991931

URL: http://svn.apache.org/viewvc?rev=991931&view=rev
Log:
[UIMA-1860] [UIMA-1840] Re-impl of result spec, supporting correct semanntics in detail. Test case updates to test additional corner cases. 

Added:
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeat.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeats.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFullFeatNames.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLang.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLangs.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsType.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsTypesMap.java
Modified:
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/PrimitiveAnalysisEngine_impl.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/ResultSpecification_impl.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/TypeOrFeature_impl.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/compatibility/AnnotatorAdapter.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java
    uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/analysis_engine/impl/AnalysisEngine_implTest.java
    uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/analysis_engine/impl/ResultSpecTest.java
    uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/analysis_engine/impl/ResultSpecWithTypeSystemTest.java

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/PrimitiveAnalysisEngine_impl.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/PrimitiveAnalysisEngine_impl.java?rev=991931&r1=991930&r2=991931&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/PrimitiveAnalysisEngine_impl.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/PrimitiveAnalysisEngine_impl.java Thu Sep  2 13:43:50 2010
@@ -359,14 +359,14 @@ public class PrimitiveAnalysisEngine_imp
           if (mLastTypeSystem != view.getTypeSystem()) {
             mLastTypeSystem = view.getTypeSystem();
             mCurrentResultSpecification.setTypeSystem(mLastTypeSystem);
-            rsFromOutputCapabilities = new ResultSpecification_impl();
+            rsFromOutputCapabilities = new ResultSpecification_impl(mLastTypeSystem);
             rsFromOutputCapabilities.addCapabilities(this.getAnalysisEngineMetaData().getCapabilities());
           }
           // the actual ResultSpec we send to the component is formed by
-          // looking at this primitive AE's declared output types and eliminiating
+          // looking at this primitive AE's declared output types and eliminating
           // any that are not in mCurrentResultSpecification.
           ResultSpecification analysisComponentResultSpec = 
-            ResultSpecification_impl.intersect(mCurrentResultSpecification, (ResultSpecification_impl) rsFromOutputCapabilities);
+            ((ResultSpecification_impl)mCurrentResultSpecification).intersect((ResultSpecification_impl)rsFromOutputCapabilities);
           mAnalysisComponent.setResultSpecification(analysisComponentResultSpec);
           mResultSpecChanged = false;
         }

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/ResultSpecification_impl.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/ResultSpecification_impl.java?rev=991931&r1=991930&r2=991931&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/ResultSpecification_impl.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/ResultSpecification_impl.java Thu Sep  2 13:43:50 2010
@@ -20,15 +20,18 @@
 package org.apache.uima.analysis_engine.impl;
 
 import java.util.ArrayList;
-import java.util.BitSet;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 
 import org.apache.uima.analysis_engine.ResultSpecification;
 import org.apache.uima.analysis_engine.TypeOrFeature;
+import org.apache.uima.cas.Feature;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.impl.TypeSystemImpl;
 import org.apache.uima.cas.text.Language;
 import org.apache.uima.resource.metadata.Capability;
 import org.apache.uima.resource.metadata.impl.MetaDataObject_impl;
@@ -36,37 +39,88 @@ import org.apache.uima.resource.metadata
 import org.apache.uima.resource.metadata.impl.XmlizationInfo;
 
 /**
- * Reference implementaion of {@link ResultSpecification}.
+ * Reference implementation of {@link ResultSpecification}.
  * 
  * Notes on the implementation
  * 
- * There are two ways this data is used:  with and without "compiling"
- *   Compiling means: adding subtypes of types and adding all features of a type
- *   Uncompiled form is called ORIGINAL.
- *   
- *   Compiling is deferred - until the first reference to containsType or Feature.
+ * Result Specifications (result specs, rs) are closely tied to capability specifications.
+ * 
+ * They consist of instances of
+ *   TypeOrFeatures and associated languages for which they are set.
  *   
- * Many instances of this class are made, sometimes via cloning.
+ * This impl supports removing previously added types and features
+ * for particular languages.
  * 
- * Sometimes types and features are deleted - the intent is to do this operation on the
- * uncompiled form, and then "recompile" it.
+ * There are two forms of the data kept:
+ *   The data as it was provided to set the items in the result spec
+ *     This form is used when removing previously added things
+ *     
+ *   The data after a type system has been provided, expanded to cover
+ *     the various implied settings, due to either
+ *       all Features flag on a type or
+ *       the type/subtype hierarchy in the type system
+ *         
+ *   TypesOrFeatures are:
+ *     typeXXX:FeatureYYY - specifying a particular feature of a type
+ *       (Corner case: typeXXX:FeatureYYY doesn't imply there's a
+ *                     typeXXX allFeat nor a
+ *                     typeXXX w/o allFeat.
+ *                     
+ *     typeXXX with allFeatures - a shorthand for specifying
+ *       typeXXX and  
+ *       typeXXX:FeatureYYY for all features YYY defined for typeXXX
+ *         (Corner case: excludes features ZZZ defined only in subtype of typeXXX)
+ *     typeXXX without allFeatures (w/o allFeat) - specifies a type, but says nothing about the features
+ *       This is specifiable in the XML.  It means:
+ *         The type is produced/needed but there's no information about the features that
+ *           are to be produced or used      
  * 
- * Types and Features are kept on a per-language basis.  Language can include a special value,
- * x-unspecified, which "matches" any other language.
+ *       containsType typeXXX  
+ *         returns true if typeXXX is in the RS, with or without the allFeats flag
+ *         returns false if only features involving typeXXX are specified
+ *         
+ *  Intersection is done on fully expanded representations.     
  * 
- * Language specifications are simplified to eliminate the country part.  All refs to 
- * test if a type or feature is in the result spec for a language uses the simplified language.
+ * There are two kinds of inheritance used
+ *   Assuming there's a type system (which must be present when intersection is used), there's type/subtype
+ *     This means that if a resultSpec is set for typeXXX, then the containsType(typeYYY) 
+ *     returns true if typeYYY is a subtype of typeXXX.
+ *     This also needs to work for typeXXX:featZZZ; containsFeature(typeYYY:featZZZ)
+ *     returns true if type YYY is a subtype of typeXXX.
+ *     
+ *   Languages have a 3 level hierarchy:
+ *     x-unspecified - the same as no language being specified.
+ *       If the resultSpec contains typeXXX for language x-unspecified,
+ *       containsType(typeXXX, languageLLL) returns true, for any languageLLL
+ *     a "base" language, without a '-', e.g. "en"
+ *     a sub-language, with one or more '-', e.g., "en-us"
+ *     
+ *     The rules for matching languages only handle these three levels of inheritance.
+ *       (Corner case: 3 or more level language hierarchy are treated as 3 level hierarchies 
+ *        eg. zh-Hant-HK (Traditional Chinese as used in Hong Kong)
+ *        See http://www.w3.org/International/articles/language-tags/Overview.en.php )
  * 
- * Set operations are done to combine, for a particular type or feature, the languages for which it is valid.
- * This is a Union operation
- * Set operations are done to union the input types/features with the output types/features when computing the default
- * result-spec for an aggregate.
- * Set operations are done to intersect the result spec with the output capabilities of a component.
+ * Design considerations and assumptions
+ *   Many instances of this class are made, sometimes via cloning.
+ *   Most uses only use types, not type:features
+ *   Most don't use languages
+ *   A small subset of the possible types and type:features is specified explicitly
+ *   Sometimes types and/or features are deleted. (language capability flow deletes types and/or features)
  * 
- * Languages are represented as integers; there is a hash table from the string to the integer, and
- * an array to go from integer to lang string.
+ * Types and Features are kept on a per-language basis.  Language can include a special value,
+ * x-unspecified, which "matches" any other language.
  * 
- * A result set of ORIGINALs consists of types/features with associated language sets. 
+ * Set operations among different result specs:
+ *   Union: done in aggregates over result-specs derived from input capabilities of delegates
+ *   Intersection: done for primitive components, over result-spec derived from output capability of the primitive
+ *   remove: one type or feature (used by language capability flow)
+ *     (Corner cases
+ *        removing typeXXX doesn't remove typeXXX:featureYYY
+ *        removing typeXXX allFeat doesn't remove typeXXX w/o allFeat (may have different languages)
+ *        removing typeXXX w/o allFeat doesn't remove typeXXX allFeat 
+ *        
+ * The compiled version is used in containsType, containsFeature testing, and is used when
+ * computing intersection.
  */
 
 public final class ResultSpecification_impl extends MetaDataObject_impl implements
@@ -74,131 +128,41 @@ public final class ResultSpecification_i
 
   private static final long serialVersionUID = 8516517600467270594L;
 
-  private static final int UNSPECIFIED_LANGUAGE_INDEX = 0;
-
   /**
    * main language separator e.g 'en' and 'en-US'
-   */
-  private static final char LANGUAGE_SEPARATOR = '-';
-  
-  private class ToF_Languages implements Cloneable {
-    public TypeOrFeature tof;
-    public BitSet languages;
-    
-    ToF_Languages(TypeOrFeature aTof, String[] aLanguages) {
-      tof = aTof;
-      languages = new BitSet();
-      for (String lang : aLanguages) {
-        languages.set(getLanguageIndex(lang));
-      }
-    }
-    
-    ToF_Languages(TypeOrFeature aTof, BitSet aLanguages) {
-      tof = aTof;
-      languages = aLanguages;
-    }
-    
-    public Object clone() {
-      return new ToF_Languages((TypeOrFeature) tof.clone(), (BitSet)languages.clone());
-    }
-
-    @Override
-    public int hashCode() {
-      final int prime = 31;
-      int result = 1;
-      result = prime * result + ((languages == null) ? 0 : languages.hashCode());
-      result = prime * result + ((tof == null) ? 0 : tof.hashCode());
-      return result;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (this == obj)
-        return true;
-      if (obj == null)
-        return false;
-      if (getClass() != obj.getClass())
-        return false;
-      final ToF_Languages other = (ToF_Languages) obj;
-      if (languages == null) {
-        if (other.languages != null)
-          return false;
-      } else if (!languages.equals(other.languages))
-        return false;
-      if (tof == null) {
-        if (other.tof != null)
-          return false;
-      } else if (!tof.equals(other.tof))
-        return false;
-      return true;
-    }
-    
+   */  
     
-  }
-
-  private boolean needsCompilation = true;
-  
-  private final Map<String, Integer> lang2int; 
-  
+  private static final String[] ARRAY_X_UNSPEC = new String[]{Language.UNSPECIFIED_LANGUAGE};
   
   /**
-   * hash map used to map fully qualified type and feature names to associated
-   * ToF_Languages instances.  This used for ORIGINAL types and features.
-   * 
-   * Another hash map is used for compiled types and features - these include
-   * the subtypes of the ORIGINAL types.  We keep the originals because the
-   * operations of adding and removing types and features are done with respect
-   * to the originals, only, and then the other map for compiled types is recomputed.
-   * 
-   * A case in particular: we need to be able to distinguish which types were
-   * originally marked allAnnotatorFeatures, versus those types which were
-   * added because they were subtypes.  The corner case happens when a type is both
-   * an original and is also an added-via-subtype, where the allAnnotatorFeatures
-   * flag of the original is not set but the subtype version is set.
-   * 
+   * form used in hash table of compilied version to represent x-unspecified
+   * (can't use null - that means entry not in table)
    */
-  private final Map<String, ToF_Languages> name2tof_langs;
-
+  private static final RsLangs compiledXunspecified = RsLangs.createSharableEmpty();  // a distinct object
+  
   /**
-   * hash map used to map fully qualified type and feature names to associated
-   * ToF_Languages instances.  This used for COMPILED types and features.
+   * used for empty type subsumption lists in subtype iterator
    */
-
-  private final Map<String, ToF_Languages> withSubtypesName2tof_langs;
-  
-//  /**
-//   * Map from TypeOrFeature objects to HashSets that include the language codes (Strings) for which
-//   * that type or feature should be produced.
-//   */
-//  private Map<TypeOrFeature, Set<String>> mTypesAndFeatures = new HashMap<TypeOrFeature, Set<String>>();
-//
-//  /**
-//   * Map from String type or feature names to HashSets that include the language codes (Strings) for
-//   * which that type or feature should be produced. This is populated by the compile() method, and
-//   * includes subtypes as well as the individual feature names for types that have
-//   * allAnnotatorFeatures=true.
-//   */
-//  private final Map<String, Set<String>> mCompiledNameToLanguageMap = 
-//                                   new HashMap<String, Set<String>>();
+  public static final List<Type> EMPTY_TYPE_LIST = new ArrayList<Type>(0);
   
   /**
-   * Default language set to use if nothing else is specified
+   * For this Result-specification, the collection of language-sets
+   * Uncompiled format
    */
-  private static final String[] UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1 = new String[] {Language.UNSPECIFIED_LANGUAGE};
-
+  private final RsTypesMap rsTypesMap;
+  
   /**
    * The type system used to compute the subtypes and allAnnotatorFeatures of types
    */
-  private TypeSystem mTypeSystem;
+  private TypeSystem mTypeSystem = null;
+
+  // compiled forms
+  private boolean needsCompilation = true;
+  private final Map<String, RsLangs> rsCompiled;
 
-  /**
-   * constructor:  init the default languge set with the language x-unspecified
-   */
   public ResultSpecification_impl() {
-    name2tof_langs = new HashMap<String, ToF_Languages>();
-    withSubtypesName2tof_langs = new HashMap<String, ToF_Languages>();
-    lang2int = new HashMap<String, Integer>();
-    lang2int.put(Language.UNSPECIFIED_LANGUAGE, 0); 
+    rsTypesMap = new RsTypesMap();
+    rsCompiled = new HashMap<String, RsLangs>();
   }
 
   /**
@@ -213,294 +177,109 @@ public final class ResultSpecification_i
     this();
     mTypeSystem = aTypeSystem;
   }
-  
-  private ResultSpecification_impl(ResultSpecification_impl original) {
-    name2tof_langs = new HashMap<String, ToF_Languages>(original.name2tof_langs.size());
-    withSubtypesName2tof_langs = new HashMap<String, ToF_Languages>(original.withSubtypesName2tof_langs.size());
-    
-    // don't share this - unless prove there are no multi-tasking interlocks possible
-    lang2int = new HashMap<String, Integer>(original.lang2int);
-    
-    for (Map.Entry<String, ToF_Languages> entry : original.name2tof_langs.entrySet()) {
-      ToF_Languages tof_langs = entry.getValue();
-      
-      // note: TypeOrFeature instances are not cloned, but shared
-      //   If they are modified, things may break
-      name2tof_langs.put(entry.getKey(), 
-          new ToF_Languages(tof_langs.tof, (BitSet)(tof_langs.languages.clone())));
-    }
-    mTypeSystem = original.mTypeSystem;
-  }
-
-  private int getBaseLanguageIndex(String language) {
-    return getLanguageIndex(getBaseLanguage(language));
-  }
-  
-  private int getLanguageIndex(String language) {
-    Integer r = lang2int.get(language);
-    if (null == r) {
-      int i = lang2int.size();
-      lang2int.put(language, Integer.valueOf(i));
-      return i;
-    }
-    return r.intValue();
-  }
 
-  private void compileIfNeeded() {
-    if (needsCompilation) {
-      compile();
-    }
-  }
-  
-  private static String getBaseLanguage(String language) {
-    String baseLanguage = language;
-    int index = language.indexOf(LANGUAGE_SEPARATOR);
-    if (index > -1) {
-      baseLanguage = language.substring(0, index);
-    }
-    return baseLanguage;
-  }
-  
   /**
-   * @see org.apache.uima.analysis_engine.ResultSpecification#getResultTypesAndFeatures()
+   * copies the result spec passed in so that updates to it
+   *   don't affect the original
+   * @param original
    */
-  public TypeOrFeature[] getResultTypesAndFeatures() {
-    TypeOrFeature[] arr = new TypeOrFeature[name2tof_langs.size()];
-    int i = 0;
-    for (ToF_Languages tof_langs : name2tof_langs.values()) {
-      arr[i++] = tof_langs.tof;
-    }
-    return arr;
-  }
-  
-  private Map<String, ToF_Languages> availName2tof_langs() {
-    if (needsCompilation) {
-      return name2tof_langs;
+  private ResultSpecification_impl(ResultSpecification_impl original) {
+    mTypeSystem = original.mTypeSystem;    // not cloned
+    rsTypesMap = new RsTypesMap(original.rsTypesMap);
+    needsCompilation = original.needsCompilation;
+    rsCompiled = new HashMap<String, RsLangs>(original.rsCompiled);     
+    for (Map.Entry<String, RsLangs> e : rsCompiled.entrySet()) {
+      e.getValue().setShared();
     }
-    return withSubtypesName2tof_langs;
   }
-
+      
   /**
-   * return the set of languages for this type or feature, or null if no such type/feature
+   * @see org.apache.uima.analysis_engine.ResultSpecification#getResultTypesAndFeatures()
    */
-  private ToF_Languages getLanguagesForTypeOrFeature(String typeOrFeature) {
-    boolean isType = typeOrFeature.indexOf(TypeSystem.FEATURE_SEPARATOR) == -1;
-    Map<String, ToF_Languages> tofMap = (isType) ? availName2tof_langs() : name2tof_langs;
-    return tofMap.get(typeOrFeature);
-  }
+  public TypeOrFeature[] getResultTypesAndFeatures() {
+    return getResultTypesAndFeatures(true, null);
+  }    
   
   /**
    * @see org.apache.uima.analysis_engine.ResultSpecification#getResultTypesAndFeatures(java.lang.String)
+   * May contain near-duplicates - same type, but with different settings of allannotatorfeatures
+   *   (only if they have different languages)
    */
   public TypeOrFeature[] getResultTypesAndFeatures(String language) {
-    
-    int languageIndex = getLanguageIndex(language);
-    int baseLanguageIndex = getBaseLanguageIndex(language);
-
-    // holds the found ToFs for the specified language
-    List<TypeOrFeature> foundToF = new ArrayList<TypeOrFeature>();
-
-    for (Map.Entry<String, ToF_Languages> entry : name2tof_langs.entrySet()) {
-      if (languageMatches(entry.getValue(), languageIndex, baseLanguageIndex)) {
-        foundToF.add(entry.getValue().tof);
-      }
-    }
-    return foundToF.toArray(new TypeOrFeature[foundToF.size()]);
+    return getResultTypesAndFeatures(false, language);
   }
-
-  // private helper functions
   
-//  private boolean sameLanguages(String [] s, BitSet b) {
-//    if (s.length != b.cardinality()) {
-//      return false;
-//    }
-//    for (String lang : s) {
-//      if ( ! b.get(getLanguageIndex(lang))) {
-//        return false;
-//      }
-//    }
-//    return true;
-//  }
-  
-  /**
-   * change null languages to the unspecified language
-   * change a set of languages that includes the unspecified language to
-   *   just the unspecified language.  
-   *   This is OK when storing things into a result spec, since
-   *     the unspecified language will match any query.
-   *   This doesn't apply for querying because the queries only 
-   *     specify one language, not a set 
-   */
-  private String [] normalizeLanguages(String [] languages) {   
-    if (null == languages) {
-      return UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1;
-    } else {
-      for (String lang : languages) {
-        if (lang.equals(Language.UNSPECIFIED_LANGUAGE)) {
-          return UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1;
+  private TypeOrFeature[] getResultTypesAndFeatures(boolean skipLanguageFilter, String language) {
+    List<TypeOrFeature> r = new ArrayList<TypeOrFeature>();
+    if ((rsTypesMap == null || rsTypesMap.nbrOfTypes() == 0) && needsCompilation == false) {
+      // being called on results of intersection
+      // probably by a test case, not a normal call
+      // attempt to construct a plausible representation
+      reconstructRsTypesFromCompiled();
+    }
+    for (RsType t : rsTypesMap) {
+      if (t.isAllFeatures && (skipLanguageFilter || RsLangs.subsumes(t.languagesAllFeat, language))) {
+        r.add(createTypeOrFeature(t.typeName, true, true));
+      }
+      if (t.isSpecified && (skipLanguageFilter || RsLangs.subsumes(t.languagesNotAllFeat, language))) {
+        if (!(t.isAllFeatures && t.languagesAllFeat.equals(t.languagesNotAllFeat)))  // don't make a duplicate
+        r.add(createTypeOrFeature(t.typeName, true, false));
+      }
+      if (t.features != null) {
+        for (RsFeat f : t.features) {
+          if (skipLanguageFilter || f.subsumes(language))
+            r.add(createTypeOrFeature(t.typeName, f.shortFeatName));
         }
       }
     }
-    // normalization is expensive - so do this once as part of parsing capabilities
-//    int i = 0;
-//    for (String language : languages) {
-//      languages[i++] = normalizeLanguage(language);
-//    }
-    return languages;  
+    return r.toArray(new TypeOrFeature[r.size()]);
   }
-  
-//  private String normalizeLanguage(String language) {
-//    String result = language.toLowerCase(Locale.ENGLISH);  // language specs are in English locale
-//    return result.replace('_', '-');
-//  }
-  
-  private void setNeedsCompilation() {
-    needsCompilation = true;
-    if (0 != withSubtypesName2tof_langs.size()) {
-      withSubtypesName2tof_langs.clear();
-    }
-  }
-  
-  private void addTypeOrFeatureInternal(TypeOrFeature tof, String[] languages) {
-    languages = normalizeLanguages(languages);
-    
-    ToF_Languages tof_langs = name2tof_langs.get(tof.getName());
-    if (null == tof_langs) {
-      name2tof_langs.put(tof.getName(), new ToF_Languages(tof, languages));
-      setNeedsCompilation();
-      return;
-    }
-    tof_langs.tof.setAllAnnotatorFeatures(tof.isAllAnnotatorFeatures());
-    BitSet langBitSet = tof_langs.languages;
-    langBitSet.clear();
-    for (String lang : languages) {
-      langBitSet.set(getLanguageIndex(lang));
-    }
-    setNeedsCompilation();
-  }
-  
-  /**
-   * Create an entry in this result spec from the type or feature and its languages
-   * @param tofLangs
-   */
-  private void addClonedToF_Languages(ToF_Languages tofLangs, ResultSpecification_impl rs) {
-    List<String> languages = new ArrayList<String>();
-    BitSet bs = tofLangs.languages;
-    for (Map.Entry<String, Integer> si : rs.lang2int.entrySet()) {
-      if (bs.get(si.getValue())) {
-        languages.add(si.getKey());
-      }
-    }
-    
-    ToF_Languages n = new ToF_Languages(
-        tofLangs.tof, 
-        languages.toArray(new String[languages.size()]));
-    name2tof_langs.put(n.tof.getName(), n);
-    setNeedsCompilation();
-  }
-
-  private TypeOrFeature createTypeOrFeature(String name, boolean isType, boolean aAllAnnotatorFeatures) {
-    TypeOrFeature r = new TypeOrFeature_impl();
-    r.setType(isType);
-    r.setName(name);
-    if (isType) {
-      r.setAllAnnotatorFeatures(aAllAnnotatorFeatures);
-    }
-    return r;
-  }
-
-  private void addResultTypeOrFeatureAddLanguage(String name, boolean isType, boolean allAnnotatorFeatures, String[] languages) {
-
-    ToF_Languages tof_langs = name2tof_langs.get(name);
-    
-    if (null == tof_langs) {
-      addTypeOrFeatureInternal(createTypeOrFeature(name, isType, allAnnotatorFeatures), languages);
-      setNeedsCompilation();
-      return;
-    }
-    
-    // tof_langs entry for this name exists, so update it
-    addResultTypeOrFeatureAddLanguageCommon(tof_langs, allAnnotatorFeatures, languages);
-  } 
-  
-  private void addResultTypeOrFeatureAddLanguage(TypeOrFeature tof, String[] languages) {
-
-    ToF_Languages tof_langs = name2tof_langs.get(tof.getName());
-    
-    if (null == tof_langs) {
-      addTypeOrFeatureInternal(tof, languages);
-      setNeedsCompilation();
-      return;
-    }
-    
-    addResultTypeOrFeatureAddLanguageCommon(tof_langs, tof.isAllAnnotatorFeatures(), languages);
-  }
-  
-  private void addResultTypeOrFeatureAddLanguageCommon(
-      ToF_Languages tof_langs, boolean allAnnotatorFeatures, String [] languages) {
 
-    // tof_langs entry for this name exists, so update it
-    if (allAnnotatorFeatures) {
-      if (!tof_langs.tof.isAllAnnotatorFeatures()) {
-        tof_langs.tof.setAllAnnotatorFeatures(true);
-        setNeedsCompilation();
+  private void reconstructRsTypesFromCompiled() {
+    // First, recompute basic rsTypes and rsFeatures hooked to types
+    for (Entry<String, RsLangs> e : rsCompiled.entrySet()) {
+      String tofName = e.getKey();
+      int b = tofName.indexOf(TypeSystem.FEATURE_SEPARATOR);
+      if (b == -1) {
+        rsTypesMap.add(tofName, false, e.getValue(), false);  
+      } else {
+        String typeName = tofName.substring(0, b);
+        String featName = tofName.substring(b+1);
+        rsTypesMap.add(typeName, featName, e.getValue(), false);
       }
     }
-
-    // update the languages by adding the new languages passed in
-    languages = normalizeLanguages(languages);
-    BitSet langBitSet = tof_langs.languages;
     
-    // "==" ok here due to normalizeLanguages call above
-    if (languages == UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1) {
-      if ( ! langBitSet.get(UNSPECIFIED_LANGUAGE_INDEX)) {
-        langBitSet.clear();
-        langBitSet.set(UNSPECIFIED_LANGUAGE_INDEX);
-        setNeedsCompilation();
+    // Second merge 
+    //   if the types features all have the same lang and are all the features,
+    //      set the allFeats flag, and merge in the langs
+    for (RsType t : rsTypesMap) {
+      if (t.hasAllFeaturesExplicitly(mTypeSystem) && t.allFeaturesHaveSameLangs()) {
+        t.isAllFeatures = true;
+        RsLangs l = t.features.features.get(0).languages;
+        if (l != null && RsLangs.isEmpty(l)) {
+          l = null;
+        }
+        if (l != null) {
+          if (t.languagesAllFeat == null) {
+            t.languagesAllFeat = RsLangs.createOrNull(l);
+          } else {  // merge in langs l
+            t.languagesAllFeat = RsLangs.addAll(t.languagesAllFeat, l);
+          }
+        }        
+        t.features = null;
       }
-      return;
-    } 
- 
-    // languages set already exists; add new ones to existing set
-    for (String lang : languages) {
-      langBitSet.set(getLanguageIndex(lang));
-    }
-    setNeedsCompilation();
-  }
-  
-  /**
-   * version used by compile to add subtypes
-   * @param aTypeName
-   * @param aAllAnnotatorFeatures
-   * @param languages
-   */
-  private void addResultType(String name, boolean allAnnotatorFeatures, BitSet languages) {
-    ToF_Languages tof_langs = withSubtypesName2tof_langs.get(name);
-    
-    if (null == tof_langs) {
-      withSubtypesName2tof_langs.put(
-          name, 
-          new ToF_Languages(createTypeOrFeature(name, true, allAnnotatorFeatures), (BitSet)languages.clone()));
-      return;
-    }
-
-    // tof_langs entry for this name exists, so update it
-    if (allAnnotatorFeatures) {
-      if (!tof_langs.tof.isAllAnnotatorFeatures()) {
-        tof_langs.tof.setAllAnnotatorFeatures(true);
+      if (t.isSpecified && t.isAllFeatures && equalsOrBothNull(t.languagesAllFeat, t.languagesNotAllFeat)) {
+        t.isSpecified = false;
+        t.languagesNotAllFeat = null;
       }
-    }
-
-    // update the languages by adding the new languages passed in
-    tof_langs.languages.or(languages); 
+    }    
   }
-
-   
+  
   /**
    * @see org.apache.uima.analysis_engine.ResultSpecification#setResultTypesAndFeatures(org.apache.uima.analysis_engine.TypeOrFeature[])
    */
   public void setResultTypesAndFeatures(TypeOrFeature[] aTypesAndFeatures) {
-    setResultTypesAndFeatures(aTypesAndFeatures, UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1);
+    setResultTypesAndFeatures(aTypesAndFeatures, ARRAY_X_UNSPEC);
   }
   
   /**
@@ -508,34 +287,51 @@ public final class ResultSpecification_i
    *      java.lang.String[])
    */
   public void setResultTypesAndFeatures(TypeOrFeature[] aTypesAndFeatures, String[] aLanguageIDs) {
-    name2tof_langs.clear();
+       
     for (TypeOrFeature tof : aTypesAndFeatures) {
-      name2tof_langs.put(tof.getName(), new ToF_Languages(tof, normalizeLanguages(aLanguageIDs)));
-    }    
-    setNeedsCompilation();
+      addResultTof(tof, aLanguageIDs, true);
+    }
   }
-
+    
+  private void addResultTof(TypeOrFeature tof, String[] langs, boolean replace) {
+    String name = tof.getName();
+    String typeName = null;
+    String shortFeatName = null;
+    int i = name.indexOf(TypeSystem.FEATURE_SEPARATOR);
+    if (i < 0) {
+      typeName = name;
+      rsTypesMap.add(typeName, tof.isAllAnnotatorFeatures(), langs, replace);
+    } else {
+      typeName = name.substring(0, i);
+      shortFeatName = name.substring(i+1);
+      rsTypesMap.add(typeName, shortFeatName, langs, replace);
+    }
+    setCompileNeeded();
+  }
+  
   /**
    * @see org.apache.uima.analysis_engine.ResultSpecification#addResultTypeOrFeature(org.apache.uima.analysis_engine.TypeOrFeature)
    */
   public void addResultTypeOrFeature(TypeOrFeature aTypeOrFeature) {
-    addTypeOrFeatureInternal(aTypeOrFeature, UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1);
+    addResultTypeOrFeature(aTypeOrFeature, ARRAY_X_UNSPEC);
   }
 
   /**
    * @see org.apache.uima.analysis_engine.ResultSpecification#addResultTypeOrFeature(org.apache.uima.analysis_engine.TypeOrFeature,
    *      java.lang.String[])
+   *      
+   * Note: Javadoc makes assumption that there's one tof per type, but this design allows 2 (one with allAnnotatorFeatures set or not).
    */
-  public void addResultTypeOrFeature(TypeOrFeature aTypeOrFeature, String[] aLanguageIDs) {
-    addTypeOrFeatureInternal(aTypeOrFeature, aLanguageIDs);
-  }
+  public void addResultTypeOrFeature(TypeOrFeature tof, String[] languages) {
+    addResultTof(tof, languages, true); 
+ }
   
   /**
    * @see org.apache.uima.analysis_engine.ResultSpecification#addResultType(java.lang.String,
    *      boolean)
    */
   public void addResultType(String aTypeName, boolean aAllAnnotatorFeatures) {
-    addTypeOrFeatureInternal(createTypeOrFeature(aTypeName, true, aAllAnnotatorFeatures), UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1);
+    addResultType(aTypeName, aAllAnnotatorFeatures, ARRAY_X_UNSPEC);
   }
   
   /**
@@ -543,14 +339,15 @@ public final class ResultSpecification_i
    *      boolean, java.lang.String[])
    */
   public void addResultType(String aTypeName, boolean aAllAnnotatorFeatures, String[] aLanguageIDs) {
-    addResultTypeOrFeatureAddLanguage(aTypeName, true, aAllAnnotatorFeatures, aLanguageIDs);
+    rsTypesMap.add(aTypeName, aAllAnnotatorFeatures, aLanguageIDs, false);
+    setCompileNeeded();
   }
   
   /**
    * @see org.apache.uima.analysis_engine.ResultSpecification#addResultFeature(java.lang.String)
    */
   public void addResultFeature(String aFullFeatureName) {
-    addResultFeature(aFullFeatureName, UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1);
+    addResultFeature(aFullFeatureName, ARRAY_X_UNSPEC);
   }
 
   /**
@@ -558,112 +355,39 @@ public final class ResultSpecification_i
    *      java.lang.String[])
    */
   public void addResultFeature(String aFullFeatureName, String[] aLanguageIDs) {
-    addResultTypeOrFeatureAddLanguage(aFullFeatureName, false, false, aLanguageIDs);
+    String typeName = null;
+    String shortFeatName = null;
+    int i = aFullFeatureName.indexOf(TypeSystem.FEATURE_SEPARATOR);
+    typeName = aFullFeatureName.substring(0, i);
+    shortFeatName = aFullFeatureName.substring(i+1);
+    rsTypesMap.add(typeName, shortFeatName, aLanguageIDs, false);
+    setCompileNeeded();
   }
 
   /**
    * @see org.apache.uima.analysis_engine.ResultSpecification#compile(org.apache.uima.cas.TypeSystem)
+   * @deprecated no longer needed, remove call to this
    */
+  @Deprecated
   public void compile(TypeSystem aTypeSystem) {
     setTypeSystem(aTypeSystem);
-    compileIfNeeded();
+    compile();
   }
-  
-//  private static class TypeToCompile {
-//    String name;
-//    boolean allFeatures;
-//    String[] languages;
-//    TypeToCompile(String aName, boolean aAllFeatures, String[] aLanguages) {
-//      name = aName;
-//      allFeatures = aAllFeatures;
-//      languages = aLanguages;
-//    }
-//  }
-  
-  private void compile() { 
-    if (null == mTypeSystem) {
-      return;
-    }
-    
-    needsCompilation = false;
-    // get set of current type names
-    // for each name, get set of implied additional names (allAnnotatorFeatures and subtypes), recursively
-    // add with languages
     
-    // issue:  can a result spec hold for language 1 types a b c, for language 2 types a b? yes
-    //         can it hold for lang 1 type a(allfeats) and for lang 2 type a(not all feat)? no
-    
-//    Map<String, TypeToCompile> typesToCompile = new HashMap<String, TypeToCompile>(mNameToTofLang.size());
-//    for (ToF_Languages tof_langs : mNameToTofLang.values()) {
-//      TypeOrFeature tof = tof_langs.tof;
-//      if (tof.isType()) {
-//        String typeName = tof.getName();
-//        typesToCompile.put(typeName, new TypeToCompile(typeName, tof.isAllAnnotatorFeatures(), tof_langs.languages));
-//      }
-//    }
-
-    for (ToF_Languages tof_langs : name2tof_langs.values()) {
-        TypeOrFeature tof = tof_langs.tof;
-        
-        addResultType(tof.getName(), tof.isAllAnnotatorFeatures(), tof_langs.languages);
-        
-        if (tof.isType()) {
-          compileTypeRecursively(mTypeSystem.getType(tof.getName()), tof.isAllAnnotatorFeatures(), tof_langs.languages);
-        }
+  private TypeOrFeature createTypeOrFeature(String name, boolean isType, boolean aAllAnnotatorFeatures) {
+    TypeOrFeature r = new TypeOrFeature_impl();
+    r.setType(isType);
+    r.setName(name);
+    if (isType) {
+      r.setAllAnnotatorFeatures(aAllAnnotatorFeatures);
     }
+    return r;
   }
-    
-//    mCompiledNameToLanguageMap.clear();
-//    for (Map.Entry<TypeOrFeature, Set<String>> elem : mTypesAndFeatures.entrySet()) {
-//      TypeOrFeature tof = elem.getKey();
-//      if (tof.isType()) {
-//        Type t = aTypeSystem.getType(tof.getName());
-//        if (t != null) {
-//          addTypeRecursive(t, aTypeSystem, elem.getValue(), tof.isAllAnnotatorFeatures());
-//        }
-//      } else { // feature
-//        mCompiledNameToLanguageMap.put(tof.getName(), elem.getValue());
-//      }
-//    }
-//    // TODO: process the set of intersections
-//  }
-
-  private void compileTypeRecursively(Type type, boolean allFeatures, BitSet languages) {
-
-    if (null != type) {
-//      if (allFeatures) {
-//        for (Feature f : (List<Feature>) type.getFeatures()) {
-//          addResultFeature(f.getName(), languages); // this add "merges"
-//                                                    // langauges with existing
-//                                                    // ones
-//        }
-//      }
-      
-      for (Type subType : (List<Type>) mTypeSystem.getDirectSubtypes(type)) {
-        String subTypeName = subType.getName();
-        addResultType(subTypeName, allFeatures, languages);
-        compileTypeRecursively(subType, allFeatures, languages);
-      }
-    }
+  
+  private TypeOrFeature createTypeOrFeature(String typeName, String featureName) {
+    return createTypeOrFeature(typeName + TypeSystem.FEATURE_SEPARATOR + featureName, false, false);
   }
   
-// /**
-// * @param t
-// */
-//  private void addTypeRecursive(Type type, TypeSystem typeSystem, Set<String> languages,
-//          boolean allFeatures) {
-//    mCompiledNameToLanguageMap.put(type.getName(), languages);
-//    if (allFeatures) {
-//      for (Feature f : (List<Feature>)type.getFeatures()) {
-//        mCompiledNameToLanguageMap.put(f.getName(), languages);
-//      }
-//    }
-//    // recurse on subtypes
-//    for (Type subtype : (List<Type>)typeSystem.getDirectSubtypes(type)) {
-//      addTypeRecursive(subtype, typeSystem, languages, allFeatures);
-//    }
-//  }
-
   /**
    * @see org.apache.uima.analysis_engine.ResultSpecification#containsType(java.lang.String)
    */
@@ -673,16 +397,27 @@ public final class ResultSpecification_i
 
   /**
    * @see org.apache.uima.analysis_engine.ResultSpecification#containsType(java.lang.String,java.lang.String)
+   * method:
+   *   
+   *   for each type (with all-feat, without all-feat):
+   *     for each type, and supertypes 
+   *       check if one of the resultSpec languages subsumes the given language.
+   *         if so, return true
+   *   return false;
+   *   
+   *   But: cache this: key = int[2]: type#, langi#, value = true/false
+   *   
    */
-  public boolean containsType(String aTypeName, String language) {
-    language = Language.normalize(language);
-
-    if (aTypeName.indexOf(TypeSystem.FEATURE_SEPARATOR) != -1)
+  
+  // TODO check cache, normalize language
+  public boolean containsType(String aTypeName, String aLanguage) {
+    if (aTypeName.indexOf(TypeSystem.FEATURE_SEPARATOR) != -1) {
       return false; // check against someone passing a feature name here
-    
+    }
     compileIfNeeded();
-    return languageMatches(availName2tof_langs().get(aTypeName), language);
+    return hasLanguage(rsCompiled.get(aTypeName), aLanguage);
   }
+
   
   /**
    * @see org.apache.uima.analysis_engine.ResultSpecification#containsFeature(java.lang.String)
@@ -690,95 +425,44 @@ public final class ResultSpecification_i
   public boolean containsFeature(String aFullFeatureName) {
     return containsFeature(aFullFeatureName, Language.UNSPECIFIED_LANGUAGE);
   }
-  
-  
-//    int typeEndPosition = aFullFeatureName.indexOf(TypeSystem.FEATURE_SEPARATOR);
-//    if (typeEndPosition == -1)
-//      return false; // check against someone passing a type name here
-//
-//    compileIfNeeded();
-//    if (availName2tof_langs().containsKey(aFullFeatureName)) {
-//      return true;
-//    }
-    
-    // special code here to return true if the allAnnotatorFeatures flag is set for the type
-//    String typeName = aFullFeatureName.substring(0, typeEndPosition);
-//    ToF_Languages tof_langs = availName2tof_langs().get(typeName);
-//    if (null != tof_langs && tof_langs.tof.isAllAnnotatorFeatures()) {
-//      if (null != mTypeSystem) {
-//        return null != mTypeSystem.getFeatureByFullName(aFullFeatureName);  // verify feature is there
-//      }
-//      return true;
-//    }
-//    return false;
-//  }
 
   /**
    * @see org.apache.uima.analysis_engine.ResultSpecification#containsFeature(java.lang.String,java.lang.String)
    */
-  public boolean containsFeature(String aFullFeatureName, String language) {
-    language = Language.normalize(language);
-    int typeEndPosition = aFullFeatureName.indexOf(TypeSystem.FEATURE_SEPARATOR);
-    if (typeEndPosition == -1)
-      return false; // check against someone passing a type name here
 
+  public boolean containsFeature(String aFullFeatureName, String aLanguage) {
+    int i = aFullFeatureName.indexOf(TypeSystem.FEATURE_SEPARATOR);
+    if (i == -1)
+      return false; // check against someone passing a type name here
     compileIfNeeded();
-    ToF_Languages tof_langs = name2tof_langs.get(aFullFeatureName);
-    if (languageMatches(tof_langs, language)) {
+    boolean found = hasLanguage(rsCompiled.get(aFullFeatureName), aLanguage);
+    if (found) {
       return true;
     }
-    
-    // special code for allAnnotatorFeatures: return true if type name is found and
-    // has all annotator features set
-    tof_langs = availName2tof_langs().get(aFullFeatureName.substring(0, typeEndPosition));
-    if (null != tof_langs && tof_langs.tof.isAllAnnotatorFeatures() && languageMatches(tof_langs, language)) {
-      if (null != mTypeSystem) {
-        return null != mTypeSystem.getFeatureByFullName(aFullFeatureName);  // verify feature is there
-      }
+    // this next bit is to keep the behavior in the case where the type system isn't specified, 
+    // the same.
+    RsType t = rsTypesMap.getRsType(aFullFeatureName.substring(0, i)); // look for just the type name
+    if (null != t && t.isAllFeatures && RsLangs.subsumes(t.languagesAllFeat, aLanguage)) {
       return true;
     }
     return false;
   }
 
   /**
-   * Languages matches if the query language is xxx-yyy and
-   *    result spec languages contains:
-   *       x-unspecified
-   *       xxx-yyy
-   *       xxx  
-   *    
-   * @param tof_langs
+   * 
+   * @param rsLangs
    * @param language
-   * @return
+   * @return  
    */
-  private boolean languageMatches(ToF_Languages tof_langs, String language) {
-    if (null == tof_langs) {
-      return false;
-    }    
-    BitSet languages = tof_langs.languages;
-    if (languages.get(UNSPECIFIED_LANGUAGE_INDEX) ||
-        languages.get(getLanguageIndex(language))) {
-      return true;
-    }
-    String baseLanguage = getBaseLanguage(language);
-    return baseLanguage != language &&  // the != means the base language is different from the language
-                                        // != is OK here
-           languages.get(getLanguageIndex(baseLanguage));
+  private static boolean hasLanguage(RsLangs rsLangs, String language) {
+    language = Language.normalize(language);
+    // rsLangs == null means there was no entry in the 
+    //   rsCompiled map for this type
+    //   It does NOT mean x-unspecified
+    return (rsLangs == null) ? false : (RsLangs.subsumes(rsLangs, language));
   }
 
-  private boolean languageMatches(ToF_Languages tof_langs, int languageIndex, int baseLanguageIndex) {
-    if (null == tof_langs) {
-      return false;
-    }    
-    BitSet languages = tof_langs.languages;
-    if (languages.get(UNSPECIFIED_LANGUAGE_INDEX) ||
-        languages.get(languageIndex)) {
-      return true;
-    }
-    return baseLanguageIndex != languageIndex && 
-           languages.get(baseLanguageIndex);
-  }
-  
+
   /**
    * @see org.apache.uima.resource.impl.MetaDataObject_impl#getXmlizationInfo()
    */
@@ -805,25 +489,36 @@ public final class ResultSpecification_i
     }
     for (Capability capability : capabilities) {
       TypeOrFeature[] tofs = outputs ? capability.getOutputs() : capability.getInputs();
-      String[] supportedLanguages = capability.getLanguagesSupported();
-      if (null == supportedLanguages ||
-          supportedLanguages.length == 0) {
-        supportedLanguages = UNSPECIFIED_LANGUAGE_IN_ARRAY_OF_1;
-      }
+      
       for (TypeOrFeature tof : tofs) {
-        addResultTypeOrFeatureAddLanguage(tof, supportedLanguages);
+        String typeName = tof.getName();
+        if (!tof.isType()) {
+          int i = typeName.indexOf(TypeSystem.FEATURE_SEPARATOR);
+          String shortFeatName = typeName.substring(i+1);
+          typeName = typeName.substring(0, i);
+          rsTypesMap.add(typeName, shortFeatName, capability.getLanguagesSupported(), false);
+        } else {
+          rsTypesMap.add(typeName, tof.isAllAnnotatorFeatures(), capability.getLanguagesSupported(), false);
+        }
       }
     }
-    setNeedsCompilation();
+    setCompileNeeded();
   }
 
   /**
    * @see org.apache.uima.analysis_engine.ResultSpecification#removeTypeOrFeature(org.apache.uima.analysis_engine.TypeOrFeature)
+   * This removes the type or feature for all languages.
+   * Beware: there are two possible ToFs one with allFeatures set or not (if they have different languages).
    */
-  public void removeTypeOrFeature(TypeOrFeature aTypeOrFeature) {
-    // remove Type or Feature from the
-    name2tof_langs.remove(aTypeOrFeature.getName());
-    setNeedsCompilation();  // may have removed something which had subtypes
+  public void removeTypeOrFeature(TypeOrFeature tof) {
+    String name = tof.getName();
+    if (tof.isType()) {
+      rsTypesMap.remove(name);
+    } else {
+      int i = name.indexOf(TypeSystem.FEATURE_SEPARATOR);
+      rsTypesMap.remove(name.substring(0, i), name.substring(i+1));
+    }
+    setCompileNeeded();
   }
 
   /**
@@ -842,11 +537,8 @@ public final class ResultSpecification_i
   }
 
   public void setTypeSystem(TypeSystem ts) {
-    if (mTypeSystem == ts) {
-      return;
-    }
     mTypeSystem = ts;
-    setNeedsCompilation();
+    setCompileNeeded();
   }
   
   public TypeSystem getTypeSystem() {
@@ -856,13 +548,119 @@ public final class ResultSpecification_i
   public String toString() {
     StringBuilder sb = new StringBuilder();
     sb.append("org.apache.uima.analysis_engine.impl.ResultSpecification_impl:\n);");
-    sb.append("needsCompilation = ").append(needsCompilation).append("\n");
-    sb.append("lang2int = ").append(lang2int).append("\n");
-    sb.append("name2tof_langs = ").append(name2tof_langs).append("\n");
-    sb.append("withSubtypesName2tof_langs = ").append(withSubtypesName2tof_langs).append("\n");
+    sb.append("  needsCompilation = ").append(needsCompilation).append("\n");
+//    sb.append("lang2int = ").append(lang2int).append("\n");
+//    sb.append("name2tof_langs = ").append(name2tof_langs).append("\n");
+//    sb.append("withSubtypesName2tof_langs = ").append(withSubtypesName2tof_langs).append("\n");
+    sb.append("rsTofLangs = ").append(rsTypesMap);
     sb.append("mTypeSystem = ").append(mTypeSystem).append("\n");
     return sb.toString();
   }
+
+  private void compileIfNeeded() {
+    if (needsCompilation) {
+      needsCompilation = false;
+      compile();
+    }
+  }
+  
+  private void setCompileNeeded() {
+    needsCompilation = true;
+    rsCompiled.clear(); 
+  }
+  
+  /**
+   * create a fully expanded version of this result spec
+   */
+  
+  private void compile() {
+    for (RsType rst : rsTypesMap) {
+      if (rst.isSpecified) {
+        addCompiledFormForTypeAndItsSubtypes(rst, rst.languagesNotAllFeat);
+      }
+      if (rst.isAllFeatures) {
+        addCompiledFormForTypeAndItsSubtypes(rst, rst.languagesAllFeat);
+        
+        for (Feature f : rst.getAllAppropriateFeatures(mTypeSystem)) {
+          addCompiledFormForFeatureAndItsSubtypes(rst, f.getShortName(), rst.languagesAllFeat);          
+        }
+      }
+      if (rst.features != null) {
+        for (RsFeat rsf : rst.features) {
+          addCompiledFormForFeatureAndItsSubtypes(rst, rsf.shortFeatName, rsf.languages);
+        }
+      }   
+    }
+  }
+  
+  private void addCompiledFormForTypeAndItsSubtypes(RsType rst, RsLangs langs) {
+    addCompiledFormEntry(rst.typeName, langs);
+    for (String subtypeName : subtypeNames(rst.typeName)) {
+      addCompiledFormEntry(subtypeName, langs);
+    }
+  }
+  
+  /**
+   * Note: the string typeXXX:featYYY may not be in the type system.
+   *   For instance, if featYYY is introduced in type Foo, we could have a spec of
+   *     FooSubtype:featYYY; this string could be unique to the result spec
+   * @param rst
+   * @param shortFeatName
+   * @param langs
+   */
+  private void addCompiledFormForFeatureAndItsSubtypes(RsType rst, String shortFeatName, RsLangs langs) {
+    addCompiledFormEntry(RsFullFeatNames.getFullFeatName(rst.typeName, shortFeatName), langs);
+    for (String subtypeName : subtypeNames(rst.typeName)) {
+      addCompiledFormEntry(RsFullFeatNames.getFullFeatName(subtypeName, shortFeatName), langs);  
+    }
+  }
+      
+  /**
+   * Adds languages to a type or feature
+   * @param tofName
+   * @param languagesToAdd
+   */
+  private void addCompiledFormEntry(String tofName, RsLangs languagesToAdd) {
+    if (languagesToAdd == null) {
+      languagesToAdd = compiledXunspecified;
+    }
+    RsLangs rsLangs = rsCompiled.get(tofName);
+    if (null == rsLangs) {
+      if (languagesToAdd != compiledXunspecified) {
+        languagesToAdd.setShared();
+      }
+      rsCompiled.put(tofName, languagesToAdd);
+      return;
+    }
+    RsLangs.addAll(rsLangs, languagesToAdd);
+  }
+  
+  private Iterable<String> subtypeNames(final String typeName) {
+    final TypeSystemImpl ts = (TypeSystemImpl) mTypeSystem;
+    return new Iterable<String>() {
+
+      public Iterator<String> iterator() {
+        return new Iterator<String>() {
+          Type t = (null == ts) ? null : ts.getType(typeName);         
+          List<Type> subtypes = (null == ts) ? EMPTY_TYPE_LIST 
+                              : (null == t ) ? EMPTY_TYPE_LIST
+                              : ts.getProperlySubsumedTypes(t);
+          int  i = 0;
+
+          public boolean hasNext() {
+            return i < subtypes.size();
+          }
+
+          public String next() {
+            return subtypes.get(i++).getName();
+          }
+
+          public void remove() {throw new UnsupportedOperationException();}
+          
+        };
+      }
+    };
+  }
   
   /**
    * Compute the feature/type + language intersection of two result specs
@@ -873,182 +671,69 @@ public final class ResultSpecification_i
    *   Each is a set of languages, interpreted as a "Union".
    *     If the set contains x-unspecified - it is taken to mean all languages
    *     if the set contains XX - it is taken to mean the union of all sublanguages XX-yy
-   *     
-   * package scope
    */
 
-  static ResultSpecification_impl intersect(ResultSpecification rs1in, ResultSpecification_impl rs2in) {
-    ResultSpecification_impl rs1 = (ResultSpecification_impl) rs1in;
-    ResultSpecification_impl rs2 = (ResultSpecification_impl) rs2in;
-    ResultSpecification_impl newRs = new ResultSpecification_impl(rs1.getTypeSystem());
+  
+  ResultSpecification_impl intersect(ResultSpecification_impl rsOther) {
     
-    rs1.compileIfNeeded();  // compile to make the next tests for type intersecting work
-    rs2.compileIfNeeded();
+    ResultSpecification_impl r = new ResultSpecification_impl();
+    r.setTypeSystem(rsOther.mTypeSystem);
     
-    // iterate over all types and features in this component's result set
-    for (Map.Entry<String, ToF_Languages> item : rs2.availName2tof_langs().entrySet()) {
-      String rs2tof = item.getKey();
-      ToF_Languages rs2Langs = item.getValue();
-      // see if in other resultSpec
-      ToF_Languages rs1Langs = rs1.getLanguagesForTypeOrFeature(rs2tof);
-      if (rs1Langs == null) {
-        continue;
-      }
-
-      // Type or Feature is in both; intersect the languages
-      // if either has language x-unspecified, use the other's language spec.
-      if (rs1Langs.languages.get(ResultSpecification_impl.UNSPECIFIED_LANGUAGE_INDEX)) {
-        newRs.addClonedToF_Languages(rs2Langs, rs2);
-        continue;
-      }
-      if (rs2Langs.languages.get(ResultSpecification_impl.UNSPECIFIED_LANGUAGE_INDEX)) {
-        newRs.addClonedToF_Languages(rs1Langs, rs1);
-        continue;
+    r.compileIfNeeded();
+    rsOther.compileIfNeeded();
+    compileIfNeeded();
+    
+    /**
+     * Iterate over other 
+     */
+    for (Iterator<Entry<String, RsLangs>> it = rsOther.rsCompiled.entrySet().iterator(); it.hasNext();) {
+      Entry<String, RsLangs> e = it.next();
+      String tofName = e.getKey();
+      RsLangs otherRsLangs = e.getValue(); 
+      
+      /**
+       * Get corresponding languages from this side
+       */
+      RsLangs thisRsLangs = rsCompiled.get(tofName);
+      if (null == thisRsLangs) {
+        continue;    // null does NOT mean x-unspecified, it means tof is not present in compiled map at all
       }
-
-      // Intersect languages - neither has x-unspecified
-
-      List<String> rsltLangs = computeResultLangIntersection(rs1, rs1Langs, rs2, rs2Langs);
- 
-      if (rsltLangs.size() > 0) {
-        newRs.addResultTypeOrFeature(rs2Langs.tof, rsltLangs.toArray(new String[rsltLangs.size()]));
+      
+      /**
+       * Intersect languages, with subsumption
+       */
+      RsLangs intersectRsLangs = thisRsLangs.intersect(otherRsLangs);      
+      if (intersectRsLangs != null) {
+        r.addCompiledFormEntry(tofName, intersectRsLangs);
       }
     }
-    return newRs;
+    return r;
   }
   
-  private static List<String> computeResultLangIntersection(
-      ResultSpecification_impl rs1, ToF_Languages rs1Langs,     
-      ResultSpecification_impl rs2, ToF_Languages rs2Langs) {
-
-    BitSet rs1bs = rs1Langs.languages;
-    BitSet rs2bs = rs2Langs.languages;
-    List<String> rsltLangs = new ArrayList<String>();
-
-    // because we don't have a list of languages as "Strings",
-    // iterate over all the languages, and skip those not in this
-    // type-or-feature
-    for (Map.Entry<String, Integer> langIndex2 : rs2.lang2int.entrySet()) {
-      if (!rs2bs.get(langIndex2.getValue())) {
-        continue;
-      }
-
-      // String intersectLang = intersectLanguages(langIndex.getKey(),
-      // rs1Langs, rs2Langs);
-
-      String thisLang = langIndex2.getKey();
-      if (rs1bs.get(rs1.getLanguageIndex(thisLang))) {
-        rsltLangs.add(thisLang);
-        continue;
-      }
-
-      // thisLang is not in the set of rs1 languages, but it might still be
-      // in the intersection, if thisLang is not a base form, and the base
-      // form
-      // *is* in the set of rs1 languages
-      String baseLang = getBaseLanguage(thisLang);
-      if (baseLang != thisLang) { // thisLang is not a base form
-        if (rs1bs.get(rs1.getLanguageIndex(baseLang))) {
-          rsltLangs.add(thisLang);
-          continue;
-        }
-      }
-    }
-    
-    // add in more specific langs in rs1 matching general lang in rs2
-   
-    // because we don't have a list of languages as "Strings",
-    // iterate over all the languages, and skip those not in this
-    // type-or-feature
-    for (Map.Entry<String, Integer> langIndex1 : rs1.lang2int.entrySet()) {
-      if (!rs1bs.get(langIndex1.getValue())) {
-        continue;
-      }
-
-      String rsLang1 = langIndex1.getKey();
-      if (rs2bs.get(rs2.getLanguageIndex(rsLang1))) {
-        continue;  // skip this if already would be in intersection
-      }
-      String baseLang1 = getBaseLanguage(rsLang1);
-      if (rsLang1 != baseLang1) {  // rsLang1 is not a base form
-        if (rs2bs.get(rs2.getLanguageIndex(baseLang1))) {
-          rsltLangs.add(rsLang1);  // add specific lang to intersection
-        }
-      }
-    }
-    return rsltLangs;
+  
+  
+  private boolean compiledFormEquals(ResultSpecification_impl other) {
+    compileIfNeeded();
+    other.compileIfNeeded();
+    return rsCompiled.equals(other.rsCompiled);  // compares two maps, returns true if have same entries
   }
 
   @Override
-  public boolean equals(Object obj) {
-    if (this == obj) {
-      return true;
-    }
-    if (!super.equals(obj)) {
-      return false;
-    }
-    if (getClass() != obj.getClass()) {
+  public boolean equals(Object aObj) {
+    if (!(aObj instanceof ResultSpecification_impl)) {
       return false;
     }
-    ResultSpecification_impl other = (ResultSpecification_impl) obj;
-    if (lang2int == null) {
-      if (other.lang2int != null) {
-        return false;
-      }
-    }
-    if (mTypeSystem == null) {
-      if (other.mTypeSystem != null) {
-        return false;
-      }
-    } else if (mTypeSystem != other.mTypeSystem) {
-      return false;
-    }
-    if (name2tof_langs == null) {
-      if (other.name2tof_langs != null) {
-        return false;
-      }
-    } 
-    this.compileIfNeeded();
-    other.compileIfNeeded();
-    
-    if (withSubtypesName2tof_langs == null) {
-      if (other.withSubtypesName2tof_langs != null) {
-        return false;
-      }
-    }
-    
-    if (availName2tof_langs().size() != other.availName2tof_langs().size()) {
-      return false;
-    }
-    
-    // iterate over all types and features in this 
-    for (Map.Entry<String, ToF_Languages> item : availName2tof_langs().entrySet()) {
-      String tof = item.getKey();
-      ToF_Languages toflangs = item.getValue();
-      ToF_Languages otherToflangs = other.availName2tof_langs().get(tof);
-      BitSet thisBs = toflangs.languages;
-      BitSet otherBs = otherToflangs.languages;
-      if (thisBs.cardinality() != otherBs.cardinality()) {
-        return false;
-      }
-      for (Map.Entry<String, Integer>l2ie : lang2int.entrySet()) {
-        if (thisBs.get(l2ie.getValue())) {
-          if (!otherBs.get(other.lang2int.get(l2ie.getKey()))) {
-            return false;
-          }
-        }
-      }
-    }
-    
-    return true;
+    return compiledFormEquals((ResultSpecification_impl)aObj);
   }
   
-  /**
-   * Hash code not implemented
-   * @return
-   */
-  @Override
-  public int hashCode() {
-    throw new UnsupportedOperationException();
+  static boolean equalsOrBothNull(Object x, Object y) {
+    if (null == x && null == y) {
+      return true;
+    }
+    if (null != x && x.equals(y)) {
+      return true;
+    }
+    return false;
   }
+
 }

Added: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeat.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeat.java?rev=991931&view=auto
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeat.java (added)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeat.java Thu Sep  2 13:43:50 2010
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.analysis_engine.impl;
+
+/**
+ * Represents the languages associated with one explicit type:feature in a result spec
+ */
+public class RsFeat {
+  final String shortFeatName;    // these are canonical strings, can be compared ==
+  RsLangs languages = null;
+  
+  RsFeat(String shortName, String[] languages) {
+    this.shortFeatName = shortName;
+    this.languages = RsLangs.createOrNull(languages);
+  }
+  
+  RsFeat(String shortName, RsLangs languages) {
+    this.shortFeatName = shortName;
+    if (null != languages) {
+      languages.setShared();
+    }
+    this.languages = languages;
+  }
+  
+  RsFeat(RsFeat original) {
+    shortFeatName = original.shortFeatName;
+    if (null != original.languages) {
+      original.languages.setShared();
+    }  
+    languages = original.languages; 
+  }
+    
+  boolean subsumes(String language) {
+    return RsLangs.subsumes(languages, language);
+  }
+     
+}

Added: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeats.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeats.java?rev=991931&view=auto
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeats.java (added)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFeats.java Thu Sep  2 13:43:50 2010
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.analysis_engine.impl;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * represents the updateable list of features, each with a particular language spec
+ * a given feature only appears once in the list, with the union of all languages 
+ */
+public class RsFeats implements Iterable<RsFeat> {
+  List<RsFeat> features = null; 
+  
+  RsFeats() {}
+  
+  /**
+   * copies into a new feature list, shares the languages
+   * @param other
+   */
+  RsFeats(RsFeats other) {
+    if (other.features == null) {
+      features = null;
+      return;
+    }      
+    features = new ArrayList<RsFeat>(other.features.size());
+    for (RsFeat f : other.features){
+      features.add(new RsFeat(f));    
+    }
+  }
+  
+  int size() {
+    return (features == null) ? 0 : features.size();
+  }
+  
+  /**
+   * ASSUMES feat not exist in features already 
+   * @param feat
+   */
+  void add(String shortFeatName, Object languages) {
+    String[] saLangs;
+    RsLangs  rsLangs;
+    RsFeat feat;
+    if (languages instanceof String[]) {
+      saLangs = (String[])languages;
+      feat = new RsFeat(shortFeatName, saLangs);
+    } else { 
+      rsLangs = (RsLangs)languages;
+      rsLangs.setShared();
+      feat = new RsFeat(shortFeatName, rsLangs);
+    }
+    if (null == features) {
+      features = new ArrayList<RsFeat>(1);
+    }
+    features.add(feat);
+  }
+  
+  /**
+   * Assume features != null
+   * remove a feature, regardless of language(s)
+   * @param shortFeatName
+   */
+  void remove(String typeName, String shortFeatName) {
+    for (Iterator<RsFeat> it = features.iterator(); it.hasNext();) {
+      if (shortFeatName.equals(it.next().shortFeatName)) {
+        it.remove();
+        return;
+      }
+    }
+  }
+  
+  boolean contains(String typeName, String shortFeatName) {
+    if (null == features || features.size() == 0) {
+      return false;
+    }
+    return null != get(shortFeatName);
+  }
+  
+  /**
+   * linear search in list for short feat name
+   * @param shortFeatName - canonicalized short feature name
+   * @return
+   */
+  RsFeat get(String shortFeatName) {
+    for (RsFeat r : features) {
+      if (r.shortFeatName.equals(shortFeatName)) { 
+        return r;
+      }
+    }
+    return null;
+  }
+
+  public Iterator<RsFeat> iterator() {
+    return (null == features) ? nullIterator : features.iterator();
+  }
+  
+  final static Iterator<RsFeat> nullIterator = new Iterator<RsFeat>() {
+
+    public boolean hasNext() {
+      return false;
+    }
+
+    public RsFeat next() {
+      return null;
+    }
+
+    public void remove() {
+    }
+  };
+}

Added: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFullFeatNames.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFullFeatNames.java?rev=991931&view=auto
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFullFeatNames.java (added)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsFullFeatNames.java Thu Sep  2 13:43:50 2010
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.analysis_engine.impl;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.WeakHashMap;
+
+import org.apache.uima.cas.TypeSystem;
+
+/**
+ * Implements a globally shared weak-reference map between
+ *   types & features to the corresponding Full Feature name
+ * Used to avoid creating new full feature names when compiling
+ *   result feature specs.
+ * Indexable for features via a 2 step index: typeName (weak) and shortFeatName
+ *
+ */
+public class RsFullFeatNames {
+  
+  private static class TypeFeats {
+    private Map<String, String> short2Full = null;  // null till used 
+  }
+  
+  private static final Map<String, TypeFeats> typeName2TypeFeats = new WeakHashMap<String, TypeFeats>(); 
+  
+  
+  public static String getFullFeatName(String typeName, String shortFeatName) {
+    synchronized (typeName2TypeFeats) {
+      TypeFeats tf = typeName2TypeFeats.get(typeName);
+      if (null == tf) {
+        tf = new TypeFeats();
+        typeName2TypeFeats.put(typeName, tf);
+      }
+      if (null == tf.short2Full) {
+        tf.short2Full = new HashMap<String, String>(3);
+      } else {
+        String s = tf.short2Full.get(shortFeatName);
+        if (null != s) {
+          return s;
+        }
+      }
+      String fullFeatName = makeFullFeatName(typeName, shortFeatName);
+      tf.short2Full.put(shortFeatName, fullFeatName);
+      return fullFeatName;
+    } 
+  }
+  
+  private static String makeFullFeatName(String typeName, String shortFeatName) {
+    StringBuilder sb = new StringBuilder(typeName.length() + 1 + shortFeatName.length());
+    return sb.append(typeName).append(TypeSystem.FEATURE_SEPARATOR).append(shortFeatName).toString();
+  }
+
+}

Added: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLang.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLang.java?rev=991931&view=auto
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLang.java (added)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLang.java Thu Sep  2 13:43:50 2010
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.analysis_engine.impl;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.uima.cas.text.Language;
+
+/**
+ * Class used to canonicalize language string
+ */
+public class RsLang {
+
+ /**
+   * global set for canonical language strings
+   */
+  private static final Map<String, String> canonicalLanguageStrings = new HashMap<String, String>();
+ 
+  /**
+   * 
+   * @param language
+   * @return x-unspecified if lang is null or a canonical version of the lang string
+   */
+  static String getCanonicalLanguageString(String language) {
+    if (language == null || language.equals(Language.UNSPECIFIED_LANGUAGE)) {  // represents x-unspecified
+      return Language.UNSPECIFIED_LANGUAGE;
+    }
+    synchronized(canonicalLanguageStrings) {
+      String cl = canonicalLanguageStrings.get(language);
+      if (cl == null) {
+        // make new string based on trimmed chars if needed, in case holding on to big string
+        language = new String(language);
+        canonicalLanguageStrings.put(language, language);
+        return language;
+      }
+      return cl;
+    }
+  }
+}

Added: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLangs.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLangs.java?rev=991931&view=auto
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLangs.java (added)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsLangs.java Thu Sep  2 13:43:50 2010
@@ -0,0 +1,366 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.analysis_engine.impl;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import org.apache.uima.cas.text.Language;
+
+/**
+ * A set of languages, each represented by a canonical string object
+ * The set is stored without any subsumed elements
+ * 
+ * Instances of this class are shareable
+ * Duplicate-on-update strategy
+ *   Requires that all update operations to it return the
+ *     possibly new RsLangs object, and that calls are always of the form
+ *       rsLangInstance = rsLangInstance.<some-update-operation>
+ *   Requires that all copy operations set the shared bit:
+ *     copiedInstance = origInstance.setShared(); 
+ *  
+ * A instance marked isShared == true is immutable
+ *   Updates cause duplication.
+ *   
+ * Users store x-unspecified as null for the rsLangs instance
+ *   Because of this, users use static methods, passing in as the first argument,
+ *   the value of rsLangs, and getting an updated value of rsLangs.
+ *     This allows the passed-in value to be null.
+ *  
+ * Languages kept in canonical form:
+ *   duplicates removed
+ *   subsumed languages removed
+ *   language strings mapped to unique strings (allowing == comparisons)
+ * Languages kept in array list, to allow for expansion
+ *   Languages not removed, only added (for a given tof)
+ */
+public class RsLangs {
+  
+  private ArrayList<String> languages; // set of languages; null means x-unspecified
+  private boolean isShared = false;    // support copy on update for languages
+  
+  private RsLangs() {}
+  
+  // for instance used to represent x-unspec inside compiled forms, where null cant be used
+  static RsLangs createSharableEmpty() {
+    RsLangs rsl = new RsLangs();
+    rsl.setShared();
+    return rsl;
+  }
+  
+  static RsLangs createOrNull(String[] languages) {
+    return replaceAll(null, languages);
+  }
+ 
+  void setShared() {
+    isShared = true;
+  }
+  
+  static RsLangs createOrNull(RsLangs rsl) {
+    if (null == rsl || rsl.languages == null) {
+      return null;
+    }
+    rsl.setShared();
+    return rsl;
+  }
+  
+  // make a copy when needed
+  private RsLangs(RsLangs original) {
+    languages = (null == original.languages) ? null : new ArrayList<String>(original.languages);
+  }
+  
+  static boolean isEmpty(RsLangs rsl) {
+    return rsl == null || rsl.languages == null || rsl.languages.size() == 0;
+  }
+  
+  /**
+   * 
+   * @param rsl may be null (means x-unspec, subsumes all)
+   * @param lang
+   * @return
+   */
+  static boolean subsumes(RsLangs rsl, String lang) {
+    return subsumesCanonical(rsl, RsLang.getCanonicalLanguageString(lang));
+  }
+  /**
+   * 
+   * @param lang
+   * @return true if any of the rsLangs subsumes the param lang
+   */
+  static boolean subsumesCanonical(RsLangs rsl, String lang) {
+    if (null == rsl || null == rsl.languages) {  // don't test for size() == 0 - that's used by replace to indicate empty, not x-unspec
+      return true;  // x-unspecified subsumes all
+    }
+    if (null == lang || lang == Language.UNSPECIFIED_LANGUAGE) {
+      return false;  // x-unspec not subsumed by anything (other than x-unspec)
+    }
+    String baseLang = getBaseLanguage(lang);
+    for (String rsLang : rsl.languages) {
+      if (subsumesCanonical(rsLang, lang, baseLang)) {
+        return true;
+      }
+    }
+    return false;
+  }
+  
+  /**
+   * 
+   * @param rsl assumed to be not null, not x-unspec
+   * @param lang assumed to be not null, not x-unspec
+   * @param baseLang
+   * @return
+   */
+  private static boolean subsumesCanonical(RsLangs rsl, String lang, String baseLang) {
+    for (String rsLang : rsl.languages) {
+      if (subsumesCanonical(rsLang, lang, baseLang)) {
+        return true;
+      }
+    }
+    return false;
+  }
+  
+  private static boolean subsumesCanonical(String containingLang, String langToTest, String langToTestBase) {
+    return containingLang == langToTest || containingLang == langToTestBase; 
+  }
+
+  /**
+   * 
+   * @param language (must not be null)
+   * @return the same == language or the base form of the language
+   */
+  private static String getBaseLanguage(String language) {
+    String baseLanguage = language;
+    int index = language.indexOf(Language.CANONICAL_LANG_SEPARATOR);
+    if (index > -1) {
+      baseLanguage = RsLang.getCanonicalLanguageString(language.substring(0, index));
+    }
+    return baseLanguage;
+  }
+
+  /**
+   * 
+   * @param rsl could be null meaning current is x-unspecified
+   * @param langs null means x-unspecified
+   * @return null (meaning x-unspecified, or an instance of RsLangs
+   */
+  static RsLangs replaceAll(RsLangs rsl, String[] langs) {
+    if (rsl == null || rsl.languages == null) {
+      if (langs == null) {
+        return null;
+      }
+      if (rsl == null || rsl.isShared) {
+        rsl = new RsLangs();
+      }
+      rsl.languages = new ArrayList<String>(1);  // special form means empty, not x-unspec
+    }
+    return addAll(rsl, langs);
+  }
+  
+  static RsLangs addAll(RsLangs rsl, String[] langs) {
+    if (null == langs || 
+        null == rsl || null == rsl.languages) {  // because x-unspec subsumes all
+      return rsl;
+    } else {
+      for (String lang : langs) {
+        rsl = add(rsl, lang);      
+      }
+      return rsl;
+    }    
+  }
+    
+  static RsLangs addAll(RsLangs rsl, RsLangs rsLangs) {
+    if (null == rsLangs || null == rsLangs.languages ||
+        null == rsl || null == rsl.languages) {  // because x-unspec subsumes all
+      return rsl;
+    }
+    for (String lang : rsLangs.languages) {
+      rsl = add(rsl, lang);      
+    }
+    return rsl;
+  }
+
+  /**
+   * add language unless it's subsumed by existing one
+   * remove any languages the newly added one subsumes
+   * store x-unspec as null
+   * @param rsl - is not null and has non-null languages array (may be empty)
+   * @param lang - may be null or x-unspec
+   */
+  static RsLangs add(RsLangs rsl, String lang) {
+    lang = RsLang.getCanonicalLanguageString(lang);
+    if (lang == Language.UNSPECIFIED_LANGUAGE) {
+      return null;
+    }
+    String baseLang = getBaseLanguage(lang);
+    if (!subsumesCanonical(rsl, lang, baseLang)) {
+      if (rsl.isShared) {
+        rsl = new RsLangs(rsl);
+      }
+      rsl.removeSubsumedLanguages(lang, baseLang);  // remove subsumed lang, but leave as empty list if all removed
+      rsl.languages.add(lang);
+    }
+    return rsl;
+  }
+  
+  /**
+   * Remove languages that are subsumed by the argument
+   * If all removed, keep as empty list
+   * @param canonicalLang
+   */
+  private void removeSubsumedLanguages(String canonicalLang, String baseLang) {
+    for (Iterator<String> it = languages.iterator(); it.hasNext();) {
+      if (subsumesCanonical(it.next(), canonicalLang, baseLang)) {
+        it.remove();
+      }
+    } 
+  }
+  
+  /**
+   * 
+   * @param other
+   * @return null for empty intersection (null doesn't mean x-unspecified here)
+   */
+  RsLangs intersect(RsLangs other) {
+    if (null == other) {
+      return null;
+    }
+        
+    if (null == this.languages) { // means x-unspecified, so return the other
+      return other;
+    }
+    if (null == other.languages) { // means x-unspecified, so return the first
+      return this;
+    }
+    
+    RsLangs r = new RsLangs();
+    r.languages = new ArrayList<String>(1);  // creates an empty, not null arraylist
+    
+    for (String lang : this.languages) {
+      if (subsumesCanonical(other, lang)) {
+        r = add(r, lang);                      // add langs in other that are subsumed by this
+      }      
+    }
+    for (String lang : other.languages) {
+      if (subsumesCanonical(this, lang)) {               // add langs in this that are subsumed by other 
+        r = add(r, lang);
+      }
+    }
+    if (r.languages.size() == 0) {
+      return null;
+    }
+    return r;
+  }
+  
+  static String[] toArray(RsLangs rsl) {    
+    return (isEmpty(rsl)) ? null : rsl.languages.toArray(new String[rsl.languages.size()]);
+  }
+  
+  /**
+   * Must return the same hashcode regardless of the value of isShared, and
+   * treating the values as a set
+   */
+  @Override
+  public int hashCode() {
+    int result = 31;
+    for (String lang : languages) {
+      result += lang.hashCode();  // non-standard, gives same answer regardless of order 
+    }
+    return result;
+  }
+
+  /**
+   * This must return true ignoring the value of isShared, and
+   * treating the lists as a set
+   */
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (obj == null) {
+      return false;
+    }
+    if (getClass() != obj.getClass()) {
+      return false;
+    }
+    RsLangs other = (RsLangs) obj;
+    if (languages == null) {
+      if (other.languages != null) {
+        return false;
+      }
+    } else {
+      if (languages.size() != other.languages.size()) {
+        return false;
+      }
+      for (String lang : languages) {
+        if (!other.languages.contains(lang)) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+  
+//  /**
+//   * also canonicalizes the language strings
+//   * @param languages
+//   * @return
+//   */
+//  private void canonicalizeRemoveDupsAndSubsumptions(String[] languages) {
+//    if (null == languages || languages.length == 0) {
+//      this.languages = null;
+//      return;
+//    }
+//    
+//    add
+//    
+//    // have 2 or more languages
+//  outer:
+//    for (int i = 0; i < languages.size(); i++) {
+//      String later = Language.normalize(languages.get(i));
+//      if (null == later || later.equals(Language.UNSPECIFIED_LANGUAGE)) {
+//        return null;
+//      }
+//      // compare against all earlier ones
+//      for (int j = 0; j < i; j++) {
+//        String earlier = languages.get(j);
+//        String earlierBase = getBaseLanguage(earlier);
+//        if (earlier.equals(later)) {
+//          languages.remove(i--);
+//          continue outer;
+//        }
+//        if (earlierBase.equals(later)) {   // later subsumes earlier
+//          languages.set(i, later);          
+//          languages.remove(i--);
+//          // recursion: handle multiple cases:
+//          //   replacing earlier with more general later could have it now 
+//          //   subsume others in between earlier and later... so need to rescan
+//          return removeDupsAndSubsumptions(languages);
+//        }
+//        if (earlier.equals(getBaseLanguage(later))) {  // earlier subsumes later
+//          languages.remove(i--);
+//          continue outer;          
+//        }
+//      }
+//    }
+//    return languages;
+//  }  
+
+}

Added: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsType.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsType.java?rev=991931&view=auto
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsType.java (added)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/analysis_engine/impl/RsType.java Thu Sep  2 13:43:50 2010
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.analysis_engine.impl;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+
+import static org.apache.uima.analysis_engine.impl.ResultSpecification_impl.equalsOrBothNull;
+
+/**
+ * Holds types and/or features with language specs
+ * 
+ * These are expected to be sparse with respect to the complete type system
+ * 
+ */
+
+public class RsType {
+    
+  public final static List<Feature> EMPTY_FEATURE_LIST = new ArrayList<Feature>(0);
+  
+  final String typeName;
+  boolean isAllFeatures = false;
+  boolean isSpecified = false;  // true if type is specified by itself, without a feature
+  RsLangs languagesAllFeat = null;     // languages for this type w/ allFeat   null means x-unspec
+  RsLangs languagesNotAllFeat = null;  // languages for this type w/o allFeat  null means x-unspec
+  RsFeats features = null; 
+  
+  RsType(String name) {
+    typeName = name;
+  }
+  
+  RsType(RsType original) {
+    typeName = original.typeName;
+    isAllFeatures = original.isAllFeatures;
+    isSpecified = original.isSpecified;
+    languagesAllFeat = RsLangs.createOrNull(original.languagesAllFeat);
+    languagesNotAllFeat = RsLangs.createOrNull(original.languagesNotAllFeat);
+    features = (original.features == null) ? null : new RsFeats(original.features);
+  }
+    
+  /**
+   * 
+   * @param shortFeatName
+   * @param lang
+   * @return true if lang subsumed by langs of the feature 
+   *                 or of the type with all-feats specified
+   */
+  boolean subsumesLanguageInFeat(String shortFeatName, String lang) {
+    if (isAllFeatures && RsLangs.subsumes(languagesAllFeat, lang)) {
+      return true;
+    }
+    RsFeat f = getFeat(shortFeatName);
+    if (null == f) {
+      return false;
+    }
+    return RsLangs.subsumes(f.languages, lang);
+  }
+  
+  RsFeat getFeat(String shortFeatName) {
+    if (null == features) {
+      return null;
+    }
+    return features.get(shortFeatName);
+  }
+   
+  /**
+   * returns the Features for a type in a result spec 
+   * @param ts The type system, may be null
+   * @return
+   */
+  List<Feature> getAllAppropriateFeatures(final TypeSystem ts) {
+    if (null == ts) {
+      return EMPTY_FEATURE_LIST;
+    }
+    Type t = ts.getType(typeName);
+    return (null == t) ? EMPTY_FEATURE_LIST : t.getFeatures();
+  }
+  
+  boolean hasAllFeaturesExplicitly(TypeSystem ts) {
+//    if (features == null || features.features == null || features.features.size() == 0 || ts == null) {
+//      return false;
+//    }
+    List<Feature> all = getAllAppropriateFeatures(ts);
+    if (all.size() == 0) {
+      if (features == null || features.features == null || features.features.size() == 0 || ts == null) {
+        return true;
+      }
+      return false;
+    }
+    int fz = (features == null || features.features == null) ? 0 : features.features.size();
+    if (fz == all.size()) {
+      for (Feature f : all) {
+        if (!features.contains(typeName, f.getShortName())) {
+          return false;
+        }
+      }
+      return true;
+    }
+    return false;
+  }
+  
+  boolean allFeaturesHaveSameLangs() {
+    if (features == null) {
+      return false;
+    }
+    int fz = features.size();
+    if (fz == 0) { 
+      return false;
+    }
+    if (fz == 1) {
+      return true;
+    }
+    List<RsFeat> rsf = features.features;
+    RsLangs l = rsf.get(0).languages;
+    
+    for (int i = 1; i < fz; i++) {
+      RsLangs fl = rsf.get(i).languages;
+      if (!equalsOrBothNull(l, fl)) {
+        return false;
+      }
+    }
+    return true;
+  }
+}
+



Mime
View raw message