ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1624062 [2/4] - in /ctakes/sandbox/dictionarytool: data/default/ src/org/apache/ctakes/dictionarytool/ src/org/apache/ctakes/dictionarytool/reader/ src/org/apache/ctakes/dictionarytool/util/ src/org/apache/ctakes/dictionarytool/util/collec...
Date Wed, 10 Sep 2014 17:30:43 GMT
Added: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/DeliveryUtil.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/DeliveryUtil.java?rev=1624062&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/DeliveryUtil.java (added)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/DeliveryUtil.java Wed Sep 10 17:30:42 2014
@@ -0,0 +1,1893 @@
+package org.apache.ctakes.dictionarytool.util;
+
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+
+// HOLY Crap, a lot of this stuff is on http://www.nlm.nih.gov/research/umls/rxnorm/overview.html
+// for instance, MRCONSO column 13 of BN is "Brand Name", which we want, as opposed to "SBDC" Semantic Branded Drug Component - Fluoxetine 4 mg/ml [Prozac]
+// TODO - modify to use this
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 7/14/14
+ * <p/>
+ * Used (in part) http://www.everypatientsadvocate.com/columns/prescriptionabbreviations.pdf
+ * <p/>
+ * Used Doc by Melissa Tharp (via Stephen Wu): http://bit.ly/WkqCPa
+ * <p/>
+ * <p/>
+ * see http://www.resourcepharm.com/pre-reg-pharmacist/pharmacy-abbreviations.html
+ * <p/>
+ * see http://www.polaris.edu/wp-content/uploads/2010/08/Adult-Pharmacy-Tech-Additional-Information.pdf
+ * <p/>
+ * See http://en.wikipedia.org/wiki/Category:Pharmacology
+ * <p/>
+ * See http://en.wikipedia.org/wiki/Time_release_technology
+ */
+final public class DeliveryUtil {
+
+   private DeliveryUtil() {
+   }
+
+
+   static private interface RxAttribute {
+      String[] getTerms();
+   }
+
+   static private interface Punctuation extends RxAttribute {
+   }
+
+   static private interface RxUnit extends RxAttribute {
+   }
+
+
+   static private String removeBadEnding( final String text, final RxAttribute rxAttribute ) {
+      String trimmed = text.trim();
+      int lastLength = -1;
+      int length = trimmed.length();
+      while ( length != lastLength ) {
+         lastLength = length;
+         for ( String word : rxAttribute.getTerms() ) {
+            if ( trimmed.endsWith( " " + word ) ) {
+               trimmed = trimmed.substring( 0, trimmed.length() - word.length() ).trim();
+            }
+         }
+         length = trimmed.length();
+      }
+      return trimmed;
+   }
+
+   static private Collection<RxAttribute> getBadEndings() {
+      final Collection<RxAttribute> punctuations = new HashSet<>();
+      punctuations.addAll( Arrays.asList( DosePer.values() ) );
+      punctuations.addAll( Arrays.asList( Conjunction.values() ) );
+      //      punctuations.addAll( Arrays.asList( Parenthesis.values() ) );
+      punctuations.addAll( Arrays.asList( BadEndParenthesis.values() ) );
+      punctuations.addAll( Arrays.asList( Dot.values() ) );
+      return punctuations;
+   }
+
+   static private String[] getDeliveryFreeTexts( final String rxNorm, final int discardLevel ) {
+      String freeText = rxNorm;
+      // for things like lamictal ( for patients taking valproate )
+      final int forIndex = rxNorm.indexOf( "( for patients " );
+      if ( forIndex > 0 ) {
+         final int nextFor = rxNorm.indexOf( ")", forIndex + 1 );
+         if ( nextFor > forIndex ) {
+            String forText = freeText.substring( 0, forIndex ).trim();
+            if ( nextFor < freeText.length() - 1 ) {
+               forText += " " + freeText.substring( nextFor + 1 ).trim();
+            }
+            freeText = forText;
+         }
+      }
+      // for things like urea ( as urea )
+      final int asIndex = freeText.indexOf( "( as " );
+      if ( asIndex > 0 ) {
+         final int nextAs = freeText.indexOf( ")", asIndex + 1 );
+         if ( nextAs > asIndex ) {
+            String asText = freeText.substring( 0, asIndex ).trim();
+            if ( nextAs < freeText.length() - 1 ) {
+               asText += " " + freeText.substring( nextAs + 1 ).trim();
+            }
+            freeText = asText;
+         }
+      }
+      // retokenize to separate characters from digits - often the dosage has unit that is not separated from amount
+      final String tokenized = TextTokenizer.getTokenizedText( freeText, true );
+      final Collection<AttributeTextSpan> textSpans = getAttributeTextSpans( tokenized, discardLevel );
+      int attributeBegin = Integer.MAX_VALUE;
+      for ( AttributeTextSpan textSpan : textSpans ) {
+         if ( textSpan.getBegin() != 0
+               && !textSpan.isRxAttributeClass( Punctuation.class )
+               && !textSpan.isRxAttributeClass( RxNumber.class )
+               && !textSpan.isRxAttributeClass( RxRatio.class )
+               && !textSpan.isRxAttributeClass( RxDose.class )
+               && !textSpan.isRxAttributeClass( RxUnit.class ) ) {
+            if ( textSpan.getBegin() < 6 ) {
+               // make sure that the text is at least 4 characters
+               int charCount = 0;
+               for ( int i = 0; i < 5; i++ ) {
+                  if ( Character.isLetterOrDigit( rxNorm.charAt( i ) ) ) {
+                     charCount++;
+                  }
+               }
+               if ( charCount < 4 ) {
+                  continue;
+               }
+            }
+            attributeBegin = Math.min( textSpan.getBegin(), attributeBegin );
+         }
+      }
+      final char[] chars = tokenized.toCharArray();
+      for ( AttributeTextSpan textSpan : textSpans ) {
+         if ( textSpan.getBegin() < attributeBegin
+               && !textSpan.isRxAttributeClass( RxRatio.class )
+               && !textSpan.isRxAttributeClass( RxDose.class )
+               && !textSpan.isRxAttributeClass( Duration.class ) ) {
+            continue;
+         }
+         for ( int i = textSpan.getBegin(); i <= textSpan.getEnd(); i++ ) {
+            chars[i] = ' ';
+         }
+      }
+      final String[] splits = new String( chars ).split( "\\s+" );
+      if ( splits.length <= 1 ) {
+         return splits;
+      }
+      final StringBuilder sb = new StringBuilder();
+      for ( String split : splits ) {
+         sb.append( split ).append( " " );
+      }
+      String trimmed = sb.toString().trim();
+      // sometimes the delivery medium "oil" is repeated when the substance is "oil" in rxnorm; "castor oil oil"
+      trimmed = trimmed.replace( " oil oil", " oil" );
+      trimmed = trimmed.replace( " green , green", " green" );
+      // for things like urea / urea
+      final int slashIndex = trimmed.indexOf( " / " );
+      if ( slashIndex > 0 && trimmed.substring( 0, slashIndex ).equals( trimmed.substring( slashIndex + 3 ) ) ) {
+         trimmed = trimmed.substring( 0, slashIndex ).trim();
+      }
+      trimmed = trimmed.replace( " ( ) ", " " );
+      final Collection<RxAttribute> badEndings = getBadEndings();
+      int lastLength = -1;
+      int length = trimmed.length();
+      while ( length != lastLength ) {
+         lastLength = length;
+         for ( RxAttribute badEnding : badEndings ) {
+            trimmed = removeBadEnding( trimmed, badEnding );
+         }
+         length = trimmed.length();
+      }
+      return trimmed.split( "\\s+" );
+   }
+
+   static public String getDeliveryFreeText( final String rxNorm ) {
+      String deliveryFree = "";
+      for ( int i = 0; i < 6; i++ ) {
+         deliveryFree = getDeliveryFreeText( rxNorm, i );
+         if ( deliveryFree.length() >= 4 || deliveryFree.equals( rxNorm ) ) {
+            break;
+         }
+         System.out.println( "Not yet .... " + rxNorm + " <<< " + deliveryFree );
+      }
+      return deliveryFree;
+   }
+
+
+   static public String getDeliveryFreeText( final String rxNorm, final int discardLevel ) {
+      if ( !rxNorm.contains( " " ) ) {
+         return rxNorm;
+      }
+      final String[] splits = getDeliveryFreeTexts( rxNorm, discardLevel );
+      if ( splits.length == 0 ) {
+         return "";
+      }
+      final StringBuilder sb = new StringBuilder();
+      for ( String split : splits ) {
+         sb.append( split ).append( " " );
+      }
+      sb.setLength( sb.length() - 1 );
+      if ( sb.length() <= 1 ) {
+         return "";
+      }
+      System.out.println( rxNorm + " --> " + sb.toString() );
+      return sb.toString();
+   }
+
+   static private Collection<AttributeTextSpan> getAttributeTextSpans( final String rxNorm, final int discardLevel ) {
+      final Collection<AttributeTextSpan> textSpans = new HashSet<>();
+      if ( discardLevel < 1 ) {
+         for ( RxAttribute rxAttribute : SubstanceState.values() ) {
+            textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+         }
+      }
+      if ( discardLevel < 2 ) {
+         for ( RxAttribute rxAttribute : Strength.values() ) {
+            textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+         }
+      }
+      if ( discardLevel < 3 ) {
+         for ( RxAttribute rxAttribute : ReleaseModifier.values() ) {
+            textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+         }
+      }
+      if ( discardLevel < 4 ) {
+         for ( RxAttribute rxAttribute : Duration.values() ) {
+            textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+         }
+      }
+      if ( discardLevel < 5 ) {
+         for ( RxAttribute rxAttribute : NonThisThat.values() ) {
+            textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+         }
+      }
+      for ( RxAttribute rxAttribute : Route.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Delivery.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Coating.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Packaging.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Metering.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Pharmicopia.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Timing.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Administration.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : EndTime.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : LabelInstruction.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : FactorNumber.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      //      for ( RxAttribute rxAttribute : DosePer.values() ) {
+      //         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      //      }
+      for ( RxAttribute rxAttribute : Conjunction.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Parenthesis.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Dot.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : DeliveryUnwanted.values() ) {
+         textSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      final Collection<AttributeTextSpan> numberTextSpans = getNumberTextSpans( rxNorm );
+      textSpans.addAll( numberTextSpans );
+      final Collection<AttributeTextSpan> perTextSpans = new HashSet<>();
+      for ( RxAttribute rxAttribute : DosePer.values() ) {
+         perTextSpans.addAll( getAttributeTextSpans( rxNorm, rxAttribute ) );
+      }
+      textSpans.addAll( perTextSpans );
+      final Collection<AttributeTextSpan> unitTextSpans = getRxUnitAttributeTextSpans( rxNorm );
+      textSpans.addAll( unitTextSpans );
+      final Collection<AttributeTextSpan> complexUnitTextSpans = getComplexUnitTextSpans( rxNorm,
+                                                                                          perTextSpans,
+                                                                                          unitTextSpans );
+      textSpans.addAll( complexUnitTextSpans );
+      textSpans.addAll( getDosageAttributeTextSpans( rxNorm, numberTextSpans, unitTextSpans, complexUnitTextSpans ) );
+
+      textSpans.addAll( getRatioAttributeTextSpans( rxNorm ) );
+      return textSpans;
+   }
+
+
+   static private Collection<AttributeTextSpan> getAttributeTextSpans( final String rxNorm,
+                                                                       final RxAttribute rxAttribute ) {
+      final Collection<AttributeTextSpan> textSpans = new HashSet<>();
+      for ( String term : rxAttribute.getTerms() ) {
+         int lastBegin = 0;
+         int begin = rxNorm.indexOf( term );
+         int end = begin + term.length() - 1;
+         while ( begin >= 0 && begin >= lastBegin ) {
+            if ( (begin == 0 || rxNorm.charAt( begin - 1 ) == ' ')
+                  && (end == rxNorm.length() - 1 || rxNorm.charAt( end + 1 ) == ' ') ) {
+               textSpans.add( new AttributeTextSpan( rxAttribute, begin, end ) );
+            }
+            lastBegin = begin;
+            begin = rxNorm.indexOf( term, end + 1 );
+            end = begin + term.length() - 1;
+         }
+      }
+      return textSpans;
+   }
+
+
+   static public enum Route implements RxAttribute {
+      ORAL( "oral", "orodispersible", "orally", "per oral", "per os", "p . o ." ),
+      NON_ORAL( "non-oral", "non - oral" ),
+      SUBLIGUAL( "sublingual", "sublingually", "sl" ),
+      TOPICAL( "topical", "top", "application", "spot control", "spot treatment" ),
+      INHALANT( "inhalant",
+                "inhalation",
+                "inhaler",
+                "inhal",
+                "inhl",
+                "respiratory",
+                "actuat",
+                "nebulizer",
+                "nebulizer cup" ),
+      INTRATRACHEAL( "intratracheal", "transtracheal", "endotracheal" ),
+      INTRAVENOUS( "intravenous", "intraven", "intravascular", "intra-articular", "intra-arterial" ),
+      INTRALESIONAL( "intralesional" ),
+      INTRASYNOVIAL( "intrasynovial" ),
+      INTRASPINAL( "intraspinal" ),
+      SUBCUTANEOUS( "subcutaneous", "subcutane", "subc", "subq", "subcut", "sc", "percutaneous", "sub-q", "sub-cu" ),
+      INTRAMUSCULAR( "intramuscular", "intramusc", "im" ),
+      INTRATHECAL( "intrathecal", "intrathec" ),
+      EYE( "eye", "ophthalmic", "ophth", "oph", "intraocular" ),
+      LEFT_EYE( "left eye", "o . s .", "oculus sinister" ),
+      RIGHT_EYE( "right eye", "oculus dexter", "o . d ." ),
+      BOTH_EYES( "both eyes", "o_2", "o2", "o . u .", "oculus uterque" ),
+      EAR( "ear", "otic", "auricular" ),
+      LEFT_EAR( "left ear", "a . l .", "a . s .", "aurio laeva", "aurio sinister" ),
+      RIGHT_EAR( "right ear", "a . d .", "aurio dextra" ),
+      BOTH_EARS( "both ears", "a . u .", "auris utrae" ),
+      NASAL( "nasal", "nose", "intranasal", "rhinal", "mucous mem", "mucous membrane", "transmucosal", "oromucosal" ),
+      RECTAL( "rectal", "rectally", "r", "rectum", "p . r ." ),
+      URETHRAL( "urethral" ),
+      VAGINAL( "vaginal", "vaginally", "vag" ),
+      UTERINE( "intrauterine" ),
+      DENTAL( "dental", "dent", "dentifrice" ),
+      LARYNGEAL( "laryngeal" ),
+      SUBARACHNOID( "subarachnoid" ),
+      INTRACARDIAC( "intracardiac" ),
+      EPIDURAL( "epidural" ),
+      DERMAL( "dermal", "transdermal", "transderm", "intradermal", "intraderm", "id", "cutaneous" ), // skin
+      HYPODERMIC( "hypodermic", "h" ),
+      HAND( "hand" ),
+      FOOT( "foot" ),
+      INTRAPERI( "intraperitoneal", "ip" ),
+      SYSTEMIC( "systemic" ),
+      NON_SYSTEMIC( "non-systemic" ),
+      UNKNOWN( "unknown delivery location",
+               "multiple routes",
+               "route not applicable",
+               "not applicable implant system" );
+      final private String[] __terms;
+
+      private Route( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   // Some of these could also be units: ampule, vial
+   static public enum Delivery implements RxAttribute {
+      CAPSULE( true, "capsule", "capsula", "caplet", "gelcap", "cap", "liquicap", "particles cap" ),
+      BEAD( true, "bead" ),
+      MICROSPHERE( true, "microsphere" ),
+      TABLET( true, "tablet", "tab", "tabella", "pill", "odt" ),
+      CHEW( true, "chewable", "chew", "chewing", "gum" ),
+      LOZENGE( true, "lozenge", "troche", "trochiscus" ),
+      INHALANT( true, "inhalant", "nebulization" ),
+      DISPERSABLE( true, "dispersable" ),
+      IV( false, "i.v.", "i . v .", "i . v", "iv" ),
+      IV_PIGGYBACK( false, "intravenous piggyback", "ivpb" ),
+      IV_PUSH( false, "intravenous push", "ivp" ),
+      INJECTION( true,
+                 "injection",
+                 "syringe",
+                 "needle",
+                 "injectable",
+                 "injectio",
+                 "infiltration",
+                 "injector",
+                 "inj",
+                 "pen" ),
+      INSERT( true, "insert" ),
+      AMPULE( true, "ampule", "ampul", "ampoule" ),
+      VIAL( true, "vial", "vil" ),
+      IRRIGATION( true, "irrigation", "irrigant" ),
+      DROPPER( true, "dropperette", "drop dispenser", "drops" ),
+      MOUTHWASH( false, "mouthwash" ),
+      TOOTHPASTE( true, "toothpaste" ),
+      PELLET( true, "pellet" ),
+      BALM( true, "balm", "liniment", "linimentum", "lin" ),
+      STICK( true, "stick" ),
+      PATCH( false, "patches", "patch" ),
+      CLOTH( true, "cloth", "pad", "swab", "swabstick", "wipe", "towelette" ),
+      SOAP( true, "soap", "body wash", "wash", "cleanser", "skin cleanser", "cleaner",
+            "lotion / shampoo", "shampoo", "shampoo and conditioner", "rinse", "shaving" ),
+      MASK( true, "mask" ),
+      ENEMA( true, "enema" ),
+      SCRUB( true, "scrub", "surgical scrub" ),
+      DRINK( false, "drink" ), // tea
+      SUPPOSITORY( false, "suppository", "suppositories", "suppositorium", "suppos", "supp" ),
+      APPLICATOR( true, "applicator" ),
+      IMPLANT( true, "implant" ),
+      BATH( false, "bath" ),
+      DOUCHE( false, "douche" ),
+      BAR( true, "bar" ),
+      WAFER( true, "wafer" ),
+      FLAKE( true, "flake" ),
+      SPONGE( true, "sponge" ),
+      BANDAGE( true, "bandage", "tape", "dressing" ),
+      STRIP( true, "strip" ),
+      PUMP( true, "pump" ),
+      TUBE( true, "tube", "long tube", "squeeze tube" ),
+      THREADED_PORT( true, "threaded port" ),
+      DISK( true, "disk device", "disk" ),
+      DEVICE( true, "device", "device assisted" ),
+      SYSTEM( true, "drug delivery system" ),
+      UNKNOWN( false, "unknown delivery form" );
+      final private String[] __terms;
+
+      private Delivery( final boolean hasPlural, final String... terms ) {
+         if ( hasPlural ) {
+            List<String> allTerms = new ArrayList<>();
+            for ( String term : terms ) {
+               allTerms.add( term + "s" );
+               allTerms.add( term );
+            }
+            __terms = allTerms.toArray( new String[allTerms.size()] );
+         } else {
+            __terms = terms;
+         }
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum ReleaseModifier implements RxAttribute {
+      SUSTAINED_RELEASE( "sustained release", "slow release", "slow - release", "sr", "sustained action", "sa" ),
+      EXTENDED_RELEASE( "extended release",
+                        "extended - release",
+                        "extended rele",
+                        "er",
+                        "xl",
+                        "xr",
+                        "xt",
+                        "entended release" ),
+      CONTROLLED_RELEASE( "controlled release", "controlled - release", "cr", "controlled delivery", "cd" ),
+      TIMED_RELEASE( "timed release", "time release", "tr" ),
+      DELAYED_RELEASE( "delayed release" ), // "dr"
+      MULTI_RELEASE( "multiphasic release", "multiphase" ),
+      MODIFIED_RELEASE( "modified release" ),
+      LONG_ACTING( "long acting", "long - acting", "la", "sustained action", "sustained release" ),
+      RAPID_DISSOLVE( "rapid dissolve", "quick dissolve" ),
+      DISINTEGRATING( "disintegrating" ),
+      INSTANT( "instant", "rapid release", "immediate release", "immediate - release", "im" );
+      final private String[] __terms;
+
+      private ReleaseModifier( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum Coating implements RxAttribute {
+      ENTERIC_COATED( "enteric coated", "enteric - coated", "enteric", "ec" ),
+      GELATIN_COATED( "gelatin coated" ),
+      FILM_COATED( "film coated" ),
+      GENERIC_COATED( "coated" );
+      final private String[] __terms;
+
+      private Coating( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum Strength implements RxAttribute {
+      ADVANCED( "advanced" ),
+      CLEANSING( "cleansing" ),
+      ANTISEPTIC( "antiseptic" ),
+      ANTIBACTERIAL( "antibacterial" ),
+      ANTICAVITY( "anticavity" ),
+      MEDICATED( "medicated" ),
+      STERILE( "sterile" ),
+      MULTI_SYMPTOM( "multi - symptom", "multi-symptom" ),
+      //      WOMEN( "women" ),
+      //      MEN( "men" ),
+      ADULT( "adult" ),
+      TEEN( "teen", "adolescent" ),
+      CHILD( "children",
+             "children's",
+             "children ' s",
+             "childrens",
+             "child",
+             "pediatric",
+             "paediatric",
+             "infants '",
+             "infants",
+             "infant" ),
+      BABY( "baby" ),
+      PRENATAL( "prenatal", "pn" ),
+      PLUS( "plus" ),
+      SEVERE( "severe" ),
+      REGULAR( "regular strength", "regular", "original strength", "classic" ),
+      HALF_STRENGTH( "half strength", "1 / 2 strength" ),
+      FULL_STRENGTH( "full strength" ),
+      DOUBLE_STRENGTH( "double strength" ),
+      EXTRA_STRENGTH( "extra strength" ),
+      MAX_STRENGTH( "maximum strength", "maximum", "max", "maximum relief" ),
+      ULTRA_STRENGTH( "ultra strength", "ultra" ),
+      VARYING_STRENGTH( "varying strength" ),
+      UNSPEC_STRENGTH( "str unspec", "no str" );
+      final private String[] __terms;
+
+      private Strength( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum Duration implements RxAttribute {
+      // ALL_DAY( "all day" ),
+      EIGHT_HOUR( "8 hour", "8 hr", "8hr" ),
+      TWELVE_HOUR( "12 hour", "12 hr", "12hr" ),      // SR ?
+      TWENTYFOUR_HOUR( "24 hour", "24 hr", "24hr", "24 hours" ),  // ER ?
+      ;
+      final private String[] __terms;
+
+      private Duration( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum Frequency implements RxAttribute {
+      EVERY_HOUR( "hourly", "per hour", "every hour", "q . h .", "quaque hora" ),
+      OTHER_HOUR( "every other hour", "alt . h .", "alternis horis" ),
+      FOUR_HOURS( "every four hours", "qqh", "q . q . h .", "quater quaque hora" ),
+      TWICE( "twice", "bis" ),
+      DAILY( "daily", "per day", "every day", "quaque die" ),
+      TWICE_DAILY( "twice per day", "twice a day", "twice daily", "b . i . d  .", "bis in die" ),
+      THREE_DAILY( "three times per day",
+                   "three times a day",
+                   "t . i . d .",
+                   "ter in die",
+                   "t . d . s .",
+                   "ter die sumendum" ),
+      FOUR_DAILY( "four times per day", "four times a day", "q . i . d .", "quater in die" ),
+      EVERY_MORNING( "every morning", "every day before noon", "q . a . m .", "quaque die ante meridiem" ),
+      OTHER_DAY( "every other day",
+                 "dieb . alt .",
+                 "diebus alternis",
+                 "q . a . d .",
+                 "quoque alternis die",
+                 "q . o . d ." ),
+      WEEKLY( "weekly", "per week", "every week" ),
+      THREE_WEEKLY( "three times per week", "three times a week", "t . i . w ." ),
+      BIWEEKLY( "biweekly" ),
+      REPEAT( "repeat", "repeats", "rep", "rept", "repetatur" ),
+      NO_REPEAT( "no repeat", "no repeats", "non rep", "non repetatur" ),
+      EVERY( "every", "q", "quaque" ),
+      TIMES( "times" ) // also x
+      // need to note "every # hours" as "q . # h" "quaque # hora"
+      ;
+      final private String[] __terms;
+
+      private Frequency( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum NonThisThat implements RxAttribute {
+      NON_CAKE( "non-caking", "non - caking" ),
+      NO_PRESERVE( "preservative free", "preservative - free" ),
+      NO_ALCOHOL( "alcohol free" ),
+      NON_CFC( "cfc free" ),
+      NON_DROWSY( "non drowsy", "non-drowsy", "non - drowsy", "no drowsiness" );
+      //      NONE( "no delivery form modifier" );
+      final private String[] __terms;
+
+      private NonThisThat( final String... terms ) {
+         final List<String> allTerms = new ArrayList<>();
+         for ( String term : terms ) {
+            allTerms.add( term );
+            allTerms.add( term + " formula" );
+         }
+         __terms = allTerms.toArray( new String[allTerms.size()] );
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum Packaging implements RxAttribute {
+      PREFILLED( "prefilled", "pre-filled" ),
+      FILLED( "filled" ),
+      REFILL( "refill", "refilled" ),
+      EMPTY( "empty" ),
+      BULK( "pharmacy bulk", "bulk" ),
+      KIT( "kit", "kits", "patient starter kit" ),
+      TITRATION_KIT( "titration kit", "patient titration kit" ),
+      BOX( "box" ),
+      BOTTLE( "bottle" ),
+      BAG( "plastic bag", "dehp - free bag" ),
+      CARTRIDGE( "cartridge" ),
+      ADAPTER( "adapter" ),
+      PACKET( "packet", "package", "pack", "pak", "pkt", "steripack", "patient pack" ),
+      RESERVOIR( "pump reservoir", "reservoir" ),
+      NON_REFRIGERATED( "non-refrigerated" ),
+      DISPOSABLE( "disposable" ),
+      DEGRADABLE( "degradable" );
+      final private String[] __terms;
+
+      private Packaging( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum SubstanceState implements RxAttribute {
+      MIX( false, "mix", "misce", "mistura", "mist" ), // also m
+      PIGGYBACK( false, "piggyback premix frozen", "piggyback frozen premix", "piggyback" ),
+      CONCENTRATE( false, "concentrated", "concentrate", "conc" ),
+      //      COMPOUND( false, "compound", "comp" ),
+      RECONSTITUTE( false, "reconstitution", "reconstituted", "reconst" ),
+      FREEZE_DRIED( false, "lyophilized" ),
+      MICRONIZED( false, "micronized" ),
+      //      PREPARATION( "preparation" ), -- needed
+      //      EXTRACT( "extract" ), -- needed
+      LIQUID( true,
+              "liquid",
+              "liquor",
+              "liqui",
+              "liqd",
+              "liq",
+              "fluid",
+              "fld",
+              "fl",
+              "syrup",
+              "syr",
+              "syrupus",
+              "elixir",
+              "elix" ),
+      LIQUIFIED( false, "liquified" ),
+      TINCTURE( true, "tincture", "tr", "tinc", "tinct" ),
+      SOLUTION( true, "solution", "solutio", "soln", "sol", "soluble", "solutuion" ),
+      SUSPENSION( true, "suspension", "susp", "supspension" ),
+      GAS( false, "gas" ),
+      AEROSOL( true,
+               "aerosol",
+               "dispersible",
+               "disp",
+               "breath activated" ), // could also be mist, but conflicts with mix
+      NON_AEROSOL( true, "non-aerosol", "non - aerosol" ),
+      WAX( false, "wax" ),
+      //      OIL( true, "oil" ),
+      CREAM( true, "cream", "creme", "crm", "cr", "oily", "lotion", "lot .", "film", "creamy" ),
+      OINTMENT( true, "ointment", "ointmen", "oin", "oint", "unguentum", "ung" ),
+      PASTE( true, "paste" ),    // toothpaste ?
+      GEL( true, "gelatin", "gel / jelly", "gel", "jelly", "jel", "softgel", "liquigel" ),
+      EMULSION( true, "emulsion", "emulsum", "emuls", "emul", "microemulsion" ),
+      INFUSION( true, "infusion" ),
+      COLLOID( true, "colloidal dispersion", "colloidal" ),
+      LIPID( false, "lipid complex", "lipid formulation" ),
+      DILUTE( false, "dilute", "dil ." ),
+      MODIFIED( false, "modified" ),
+      DRIED( false, "dried" ),
+      ANHYDROUS( false, "anhydrous" ),
+      FOAM( true, "foaming cleanser", "foaming", "foam" ),
+      SPRAY( true, "aqua - spray", "spray" ),
+      POWDER( true,
+              "dry powder",
+              "powdered",
+              "powder - like",
+              "powder",
+              "pwdr",
+              "powd",
+              "pulvis",
+              "pulv",
+              "pdr for recon",
+              "granule" ),//, "dust" ),
+      CRYSTAL( true, "crystal" ),
+      EFFERVESCENT( false, "effervescent" ),                 // remove on multiple ?
+      NON_EFFERVESC( false, "non-effervescent", "non-efervescent", "non-effervess", "non-efervescen",
+                     "non - effervescent", "non - efervescent", "non - effervess", "non - efervescen" ),
+      DISSOLVING( false, "dissolving" ),
+      SOLVENT( true, "solvent" ),
+      IN_WATER( false, "in water", "ex aqua", "ex aq" ),
+      IN_OIL( false, "in oil" ),
+      //      DEXTROSE_SOL( false, "dextrose 5 %", "d5w" ),
+      //      DEXTROSE_SALINE( false, "dextrose 5 % in saline", "dextrose 5 % in normal saline", "d5ns" ),
+      //      SALINE( false, "normal saline", "ns" ),
+      //      HALF_SALINE( false, "half normal saline", "1/2ns", "1 / 2 ns", "1 / 2ns" ),
+      HARD_SOFT( false, "hard soft etc", "hard , soft , etc" ),
+      ADDITIVE( true, "additive", "sdv , mdv or additive" ),
+      MULTI_LAYER( true, "multilayer" );
+      final private String[] __terms;
+
+      private SubstanceState( final boolean hasPlural, final String... terms ) {
+         if ( hasPlural ) {
+            List<String> allTerms = new ArrayList<>();
+            for ( String term : terms ) {
+               allTerms.add( term + "s" );
+               allTerms.add( term );
+            }
+            __terms = allTerms.toArray( new String[allTerms.size()] );
+         } else {
+            __terms = terms;
+         }
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   // TODO merge with Administration
+   static public enum Metering implements RxAttribute {
+      FINAL_DOSE( "final dosage form", "final dose form" ),
+      METERED( "metered - dose pump", "metered - dose", "metered" ),
+      EACH( "each", "ea", "single - use" ),
+      IN_ONE( "in one", "in 1", "as one", "as 1", "large single", "one large", "bol .", "bolus", "1dose" ),
+      MINIMUM( "minimum", "min", "a minimum" ),  // also m
+      SUFFICIENT( "sufficient quantity", "q . s .", "quantum sufficiat" ),
+      TITRADOSE( "titradose" ),
+      STATDOSE( "statdose" ),
+      DOSE( "dose" );
+      final private String[] __terms;
+
+      private Metering( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum Pharmicopia implements RxAttribute {
+      US_PHARMACOPEIA( "united states pharmacopoeia",
+                       "united states pharmacopia",
+                       "usp",
+                       "u.s.p.",
+                       "u . s . p .",
+                       "u . s . p" ),
+      BRIT_PHARMACOPIA( "british pharmacopia", "bpc" ), //, "bp"
+      INTERNATIONAL( "international", "intl" );
+      final private String[] __terms;
+
+      private Pharmicopia( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+
+   //
+   //       DOSAGE
+   //
+
+   static public enum Timing implements RxAttribute {
+      //      IMMEDIATELY( "immediately", "stat", "statim" ),
+      BEFORE_MEAL( "before meal", "before meals", "a . c .", "ante cibum" ),
+      AFTER_MEAL( "after meal", "after meals", "p . c .", "post cibum" ),
+      FOOD( "food", "cibos", "with food", "cum cibos", "cf" ),  // also "c" and "cc"
+      MORNING( "morning", "before noon", "a . m .", "ante meridiem" ),
+      AFTERNOON( "afternoon", "after noon", "p . m .", "post meridiem" ),
+      EVENING( "evening" ),
+      NIGHT_TIME( "at night", "noct", "nocte", "nighttime", "night time" ),
+      BEDTIME( "at bedtime", "at bed time", "h . s .", "hora somni" ),
+      DAYTIME( "day time", "daytime" ),
+      AROUND_THE_CLOCK( "around the clock", "a . t . c ." );
+      final private String[] __terms;
+
+      private Timing( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+
+   static public enum Administration implements RxAttribute {
+      EACH( "ana", "of each" ), // "aa",
+      UP_TO( "ad", "up to" ),
+      AS_DESIRED( "as desired", "ad lib", "ad libitum", "freely", "as much as one desires" ),
+      AS_NEEDED( "as needed", "prn", "p . r . n .", "pro re nata" ),
+      IF_NEEDED( "if needed", "if there is a need", "s . o . s .", "si op . sit", "si op sit", "si opus sit" ),
+      AS_DIRECTED( "as directed", "e . m . p .", "ex modo prescripto", "ud", "u . d .", "ut dictum" ),
+      AS_WRITTEN( "as written", "dispense as written", "d . a . w ." ),
+      AS_JUDGED( "use your judgement", "s . a .", "secundum artum" ),
+      SUCH_DOSE( "give of such doses", "d . t . d .", "dentur tales doses" ),
+      NOT_EXCEEDING( "not exceeding", "not to exceed", "n . t . e ." ),
+      PATIENT_CONTROL( "patient controlled", "patient administration" ),
+      MANAGEMENT_USE( "medication management use" );   // "mmu"
+      final private String[] __terms;
+
+      private Administration( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum EndTime implements RxAttribute {
+      DISCONTINUE( "discontinue", "dc", "d / c", "disc" );
+      final private String[] __terms;
+
+      private EndTime( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum LabelInstruction implements RxAttribute {
+      LABEL_AS_SUCH( "label as such", "l . a . s ." ),
+      WRITE_ON_LABEL( "write on label", "sig" ),
+      APPLY( "apply", "admov", "admove" ),
+      DISPENSE( "dispense", "disp ." ),
+      STIR( "stir", "shake", "agit", "agita" ),
+      AMOUNT( "amount", "amt" ),
+      DIVIDE( "divide", "div ." ),
+      SEND( "send", "mitte" );
+      final private String[] __terms;
+
+      private LabelInstruction( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+
+   static public enum FactorNumber implements RxAttribute {
+      HUNDRED( "hundred" ),
+      THOUSAND( "thousand" ),
+      MILLION( "million" ),
+      BILLION( "billion" ),
+      HALF( "half", "ss", "semis" );
+      final private String[] __terms;
+
+      private FactorNumber( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static private class RxComplexUnit implements RxAttribute, RxUnit {
+      final private String[] __terms;
+
+      private RxComplexUnit( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+
+   }
+
+   static private class RxDose implements RxAttribute {
+      final private String[] __terms;
+
+      private RxDose( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static private class RxRatio implements RxAttribute {
+      final private String[] __terms;
+
+      private RxRatio( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static private class RxNumber implements RxAttribute {
+      final private String[] __terms;
+
+      private RxNumber( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+
+   //   static private Collection<AttributeTextSpan> getDosageAttributeTextSpansOld( final String rxNorm ) {
+   //      int doseBegin = -1;
+   //      int doseEnd = -1;
+   //      int numberableBegin = -1;
+   //      int wordBegin = 0;
+   //      int wordEnd = rxNorm.indexOf( ' ' );
+   //      if ( wordEnd < 0 ) {
+   //         wordEnd = rxNorm.length();
+   //      }
+   //      final Collection<AttributeTextSpan> dosageSpans = new HashSet<>();
+   //      boolean haveDigit = false;
+   //      boolean haveRxUnit = false;
+   //      while ( wordBegin >= 0 ) {
+   //         final String word = rxNorm.substring( wordBegin, wordEnd );
+   //         final boolean hasDigit = hasDigit( word );
+   //         if ( hasDigit ) {
+   //            haveDigit = true;
+   //            numberableBegin = -1;
+   //         }
+   ////         final boolean hasRxUnit = hasRxUnit( word );
+   ////         if ( hasRxUnit ) {
+   ////            haveRxUnit = true;
+   ////            numberableBegin = -1;
+   ////         }
+   //         final int rxUnitEnd = getRxUnitEnd( rxNorm, wordBegin );
+   //         final boolean hasRxUnit = rxUnitEnd > 0;
+   //         if ( hasRxUnit ) {
+   //            haveRxUnit = true;
+   //            numberableBegin = -1;
+   //            wordEnd = rxUnitEnd;
+   //         }
+   //
+   //         final boolean isNumberable = isNumberable( word );
+   //         if ( hasDigit || isNumberable || hasRxUnit ) {
+   //            if ( doseBegin < 0 ) {
+   //               doseBegin = wordBegin;
+   //            }
+   //            doseEnd = wordEnd;
+   //            if ( isNumberable ) {
+   //               numberableBegin = wordBegin;
+   //            }
+   //         }
+   //         if ( (!hasDigit && !isNumberable && !hasRxUnit) || wordEnd == rxNorm.length() ) {
+   //            if ( haveDigit && haveRxUnit ) {
+   //               if ( numberableBegin > 0 ) {
+   //                  doseEnd = numberableBegin-1;
+   //               }
+   //               final RxDose rxDose = new RxDose( rxNorm.substring( doseBegin, doseEnd ) );
+   //               final AttributeTextSpan attributeTextSpan = new AttributeTextSpan( rxDose, doseBegin, doseEnd-1 );
+   //               dosageSpans.add( attributeTextSpan );
+   //            }
+   //            haveDigit = false;
+   //            haveRxUnit = false;
+   //            doseBegin = -1;
+   //            doseEnd = -1;
+   //            numberableBegin = -1;
+   //         }
+   //         wordBegin = wordEnd + 1;
+   //         if ( wordBegin >= rxNorm.length() ) {
+   //            break;
+   //         }
+   //         wordEnd = rxNorm.indexOf( ' ', wordBegin );
+   //         if ( wordEnd < 0 ) {
+   //            wordEnd = rxNorm.length();
+   //         }
+   //      }
+   //      return dosageSpans;
+   //   }
+   //
+   //   static private boolean hasDigit( final String word ) {
+   //      final Collection<AttributeTextSpan> textSpans = new HashSet<>();
+   //      textSpans.addAll( getNumberRxUnit( word ) );
+   //      for ( RxAttribute rxAttribute : DosePer.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : Dot.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      final char[] chars = word.toCharArray();
+   //      for ( TextSpan textSpan : textSpans ) {
+   //         for ( int i=textSpan.__begin; i<=textSpan.__end; i++ ) {
+   //            chars[i] = ' ';
+   //         }
+   //      }
+   //      boolean haveDigit = false;
+   //      for ( char c : chars ) {
+   //         if ( c != ' ' && !Character.isDigit( c ) ) {
+   //            return false;
+   //         }
+   //         haveDigit = true;
+   //      }
+   //      return haveDigit;
+   //   }
+
+   static private boolean isOnlyDot( final String word ) {
+      for ( RxAttribute dot : Dot.values() ) {
+         for ( String dotText : dot.getTerms() ) {
+            if ( word.equals( dotText ) ) {
+               return true;
+            }
+         }
+      }
+      return false;
+   }
+
+   static private boolean isOnlyDigits( final String word ) {
+      final char[] chars = word.toCharArray();
+      for ( char c : chars ) {
+         if ( !Character.isDigit( c ) ) {
+            return false;
+         }
+      }
+      return true;
+   }
+
+   static private boolean isOnlyDivider( final String word ) {
+      for ( RxAttribute dosePer : DosePer.values() ) {
+         for ( String divider : dosePer.getTerms() ) {
+            if ( word.equals( divider ) ) {
+               return true;
+            }
+         }
+      }
+      return false;
+   }
+
+   //   static private boolean hasRxUnit( final String word ) {
+   //      return !getRxUnit( word ).isEmpty() || !getNumberRxUnit( word ).isEmpty();
+   //   }
+
+   //   static private int getRxUnitEnd( final String rxNorm, final int startIndex ) {
+   //      final String suffix = rxNorm.substring( startIndex ).trim();
+   //      final Collection<AttributeTextSpan> textSpans = getRxUnit( suffix );
+   //      textSpans.addAll( getNumberRxUnit( suffix ) );
+   //      int maxEnd = -1;
+   //      for ( AttributeTextSpan textSpan : textSpans ) {
+   //         if ( textSpan.getBegin() == startIndex ) {
+   //            maxEnd = Math.max( maxEnd, textSpan.getEnd() );
+   //         }
+   //      }
+   //      return maxEnd;
+   //   }
+   //
+   //
+   //   static private Collection<AttributeTextSpan> getRxUnit( final String word ) {
+   //      final Collection<AttributeTextSpan> textSpans = new HashSet<>();
+   //      for ( RxAttribute rxAttribute : FactorNumber.values() ) {
+   //         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : Mass.values() ) {
+   //         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : Volume.values() ) {
+   //         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : Rads.values() ) {
+   //         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : ConcUnit.values() ) {
+   //         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : TimeUnit.values() ) {
+   //         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : Percent.values() ) {
+   //         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : StrengthUnit.values() ) {
+   //         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : UnitMisc.values() ) {
+   //         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : Coding.values() ) {
+   //         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      return textSpans;
+   //   }
+   //
+   //   static private Collection<AttributeTextSpan> getNumberRxUnit( final String word ) {
+   //      final Collection<AttributeTextSpan> textSpans = new HashSet<>();
+   //      for ( RxAttribute rxAttribute : FactorNumber.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : Mass.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : Volume.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : Rads.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : ConcUnit.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : TimeUnit.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : Percent.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : StrengthUnit.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : UnitMisc.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      for ( RxAttribute rxAttribute : Coding.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      return textSpans;
+   //   }
+   //
+   //
+   //
+   //   static private Collection<AttributeTextSpan> getNumberAttributeTextSpans( final String word,
+   //                                                                       final RxAttribute rxAttribute ) {
+   //      final Collection<AttributeTextSpan> textSpans = new HashSet<>();
+   //      for ( String term : rxAttribute.getTerms() ) {
+   //         int lastBegin = 0;
+   //         int begin = word.indexOf( term );
+   //         int end = begin+term.length();
+   //         while ( begin >= 0 && begin >= lastBegin
+   //               && (begin == 0 || Character.isDigit( word.charAt( begin-1 ) )
+   //               // / and - can be per, x can be a dimension, k can be thousand
+   //               || word.charAt( begin-1 ) == '/' || word.charAt( begin-1 ) == '-'
+   //               || word.charAt( begin-1 ) == 'x')
+   //               && (end == word.length() || Character.isDigit( word.charAt( end ) )
+   //               || word.charAt( end ) == '/' || word.charAt( end ) == '-'
+   //               || word.charAt( end ) == 'x' || word.charAt( end ) == 'k') ) {
+   //            textSpans.add( new AttributeTextSpan( rxAttribute, begin, end-1 ) );
+   //            lastBegin = begin;
+   //            begin = word.indexOf( term, end+1 );
+   //            end = begin + term.length()-1;
+   //         }
+   //      }
+   //      return textSpans;
+   //   }
+
+   //   static private boolean isNumberable( final String word ) {
+   //      return isDosePer( word ) || isDot( word );
+   //   }
+   //
+   //   static private boolean isDosePer( final String word ) {
+   //      final Collection<AttributeTextSpan> textSpans = new HashSet<>();
+   //      for ( RxAttribute rxAttribute : DosePer.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      return !textSpans.isEmpty();
+   //   }
+   //
+   //   static private boolean isDot( final String word ) {
+   //      final Collection<AttributeTextSpan> textSpans = new HashSet<>();
+   //      for ( RxAttribute rxAttribute : Dot.values() ) {
+   //         textSpans.addAll( getNumberAttributeTextSpans( word, rxAttribute ) );
+   //      }
+   //      return !textSpans.isEmpty();
+   //   }
+
+
+   static private Collection<AttributeTextSpan> getNumberTextSpans( final String rxNorm ) {
+      final Collection<AttributeTextSpan> textSpans = new HashSet<>();
+      int wordBegin = 0;
+      int wordEnd = rxNorm.indexOf( ' ' );
+      if ( wordEnd < 0 ) {
+         wordEnd = rxNorm.length();
+      }
+      int numberBegin = -1;
+      int numberEnd = -1;
+      boolean haveNumber = false;
+      boolean wasOnlyDigits = false;
+      boolean wasComma = false;
+      boolean haveComma = false;
+      boolean haveDecimal = false;
+      int lastDigitCount = 0;
+      while ( wordBegin >= 0 ) {
+         boolean newNumber = false;
+         boolean addNumber = false;
+         final String word = rxNorm.substring( wordBegin, wordEnd );
+         final boolean isOnlyDigits = isOnlyDigits( word );
+         final boolean isComma = word.equals( "," );
+         final boolean isDecimal = word.equals( "." );
+         if ( isOnlyDigits ) {
+            if ( !haveNumber ) {
+               // new number, set the flag
+               newNumber = true;
+            } else if ( wasOnlyDigits ) {
+               // second number in a row, set the add flag and the new flag
+               addNumber = true;
+               newNumber = true;
+            } else if ( wasComma && word.length() != 3 ) {
+               // had a comma but this is not "1,[000]", set the add flag and the new flag
+               addNumber = true;
+               newNumber = true;
+            } else {
+               // already have a number and this wasn't preceded by a new number, advance the end offset
+               numberEnd = wordEnd;
+            }
+            lastDigitCount = word.length();
+         } else if ( isComma ) {
+            if ( wasComma || haveDecimal ) {
+               // second comma is not part of the number, add the number and reset
+               addNumber = true;
+            }
+         } else if ( isDecimal ) {
+            if ( haveDecimal ) {
+               // second dot is not part of the number, add the number and reset
+               addNumber = true;
+            } else if ( haveComma && lastDigitCount != 3 ) {
+               // had a comma but this is not "1,[000].", set the add flag and reset
+               addNumber = true;
+            }
+         } else {
+            if ( haveNumber ) {
+               // non-number related, add the number and reset
+               addNumber = true;
+            }
+         }
+         if ( addNumber ) {
+            final RxNumber number = new RxNumber( rxNorm.substring( numberBegin, numberEnd ).trim() );
+            textSpans.add( new AttributeTextSpan( number, numberBegin, numberEnd - 1 ) );
+            haveNumber = false;
+         }
+         wasOnlyDigits = isOnlyDigits;
+         wasComma = haveNumber && isComma;
+         haveComma = haveNumber && (isComma || haveComma);
+         haveDecimal = haveNumber && (isDecimal || haveDecimal);
+         if ( newNumber ) {
+            haveNumber = true;
+            numberBegin = wordBegin;
+            numberEnd = wordEnd;
+         }
+         wordBegin = wordEnd + 1;
+         if ( wordBegin >= rxNorm.length() ) {
+            break;
+         }
+         wordEnd = rxNorm.indexOf( ' ', wordBegin );
+         if ( wordEnd < 0 ) {
+            wordEnd = rxNorm.length();
+         }
+      }
+      return textSpans;
+   }
+
+
+   static private Collection<AttributeTextSpan> getRxUnitAttributeTextSpans( final String word ) {
+      final Collection<AttributeTextSpan> textSpans = new HashSet<>();
+      for ( RxAttribute rxAttribute : FactorNumber.values() ) {
+         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Mass.values() ) {
+         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Volume.values() ) {
+         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Rads.values() ) {
+         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : ConcUnit.values() ) {
+         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : TimeUnit.values() ) {
+         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Percent.values() ) {
+         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : StrengthUnit.values() ) {
+         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : UnitMisc.values() ) {
+         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+      }
+      for ( RxAttribute rxAttribute : Coding.values() ) {
+         textSpans.addAll( getAttributeTextSpans( word, rxAttribute ) );
+      }
+      return textSpans;
+   }
+
+
+   static private Collection<AttributeTextSpan> getComplexUnitTextSpans( final String rxNorm,
+                                                                         final Collection<AttributeTextSpan> perDoses,
+                                                                         final Collection<AttributeTextSpan> rxUnits ) {
+      if ( rxUnits.size() < 2 || perDoses.isEmpty() ) {
+         return Collections.emptySet();
+      }
+      final Collection<AttributeTextSpan> complexUnits = new HashSet<>();
+      for ( AttributeTextSpan unit : rxUnits ) {
+         for ( AttributeTextSpan perDose : perDoses ) {
+            if ( perDose.getBegin() == unit.getEnd() + 2 ) {
+               for ( AttributeTextSpan nextUnit : rxUnits ) {
+                  if ( nextUnit.getBegin() == perDose.getEnd() + 2 ) {
+                     final RxComplexUnit complexUnit = new RxComplexUnit( rxNorm.substring( unit.getBegin(),
+                                                                                            nextUnit.getEnd() + 1 ) );
+                     final AttributeTextSpan complexAttribute = new AttributeTextSpan( complexUnit,
+                                                                                       unit.getBegin(),
+                                                                                       nextUnit.getEnd() );
+                     complexUnits.add( complexAttribute );
+                  }
+               }
+            }
+         }
+      }
+      return complexUnits;
+   }
+
+
+   static private Collection<AttributeTextSpan> getDosageAttributeTextSpans( final String rxNorm,
+                                                                             final Collection<AttributeTextSpan> numbers,
+                                                                             final Collection<AttributeTextSpan> rxUnits,
+                                                                             final Collection<AttributeTextSpan> rxComplexUnits ) {
+      if ( numbers.isEmpty() || (rxUnits.isEmpty() && rxComplexUnits.isEmpty()) ) {
+         // Can't have a dosage if there is no number or no unit
+         return Collections.emptySet();
+      }
+      final Collection<AttributeTextSpan> dosageSpans = new HashSet<>();
+      // for each number check for a following unit, per unit, series of units or series of per units
+      for ( AttributeTextSpan number : numbers ) {
+         final int numberEnd = number.getEnd();
+         if ( numberEnd + 2 >= rxNorm.length() ) {
+            continue;
+         }
+         boolean haveComplexUnit = false;
+         for ( AttributeTextSpan complexUnit : rxComplexUnits ) {
+            if ( complexUnit.getBegin() == numberEnd + 2 ) {
+               final RxDose rxDose = new RxDose( rxNorm.substring( number.getBegin(), complexUnit.getEnd() + 1 ) );
+               dosageSpans.add( new AttributeTextSpan( rxDose, number.getBegin(), complexUnit.getEnd() ) );
+               haveComplexUnit = true;
+            }
+         }
+         if ( haveComplexUnit ) {
+            continue;
+         }
+         for ( AttributeTextSpan unit : rxUnits ) {
+            if ( unit.getBegin() == numberEnd + 2 ) {
+               final RxDose rxDose = new RxDose( rxNorm.substring( number.getBegin(), unit.getEnd() + 1 ) );
+               dosageSpans.add( new AttributeTextSpan( rxDose, number.getBegin(), unit.getEnd() ) );
+            }
+         }
+      }
+      return dosageSpans;
+   }
+
+
+   // TODO
+   //   static private Collection<AttributeTextSpan> getRatioAttributeTextSpansNew( final String rxNorm,
+   //                                                                            final Collection<AttributeTextSpan> numbers,
+   //                                                                            final Collection<AttributeTextSpan> dosePers ) {
+   //      if ( numbers.size() < 2 || dosePers.isEmpty() ) {
+   //         return Collections.emptySet();
+   //      }
+   //      final List<AttributeTextSpan> numberList = new ArrayList( numbers );
+   //      Collections.sort( numberList, TextSpanComparator.getInstance() );
+   //      int wordBegin = -1;
+   //      int perEnd = -1;
+   //      int ratioBegin = -1;
+   //      for ( AttributeTextSpan number : numberList ) {
+   //         if ( wordBegin >= 0 && number.getBegin() == perEnd+2 ) {
+   //            // have ratio
+   //            ratioBegin = wordBegin;
+   //         }
+   //         perEnd = -1;
+   //         for ( AttributeTextSpan dosePer : dosePers ) {
+   //            if ( dosePer.getBegin() == number.getEnd()+2 ) {
+   //               if ( wordBegin < 0 ) {
+   //                  // startRatio
+   //                  wordBegin = number.getBegin();
+   //                  perEnd = dosePer.getEnd();
+   //               }
+   //            }
+   //         }
+   //      }
+   //
+   //   }
+
+   static private Collection<AttributeTextSpan> getRatioAttributeTextSpans( final String rxNorm ) {
+      int wordEnd = rxNorm.indexOf( ' ' );
+      if ( wordEnd < 0 ) {
+         // only one word in the term, can't be a ratio
+         return Collections.emptySet();
+      }
+      int ratioBegin = -1;
+      int ratioEnd = -1;
+      int wordBegin = 0;
+      boolean haveNumber = false;
+      boolean haveRatio = false;
+      boolean wasDigit = false;
+      boolean wasDot = false;
+      boolean wasDivider = false;
+      while ( wordBegin >= 0 ) {
+         final String word = rxNorm.substring( wordBegin, wordEnd );
+         final boolean isDigit = isOnlyDigits( word );
+         final boolean isDot = isOnlyDot( word );
+         final boolean isDivider = isOnlyDivider( word );
+         if ( isDigit ) {
+            if ( wasDigit && haveRatio ) {
+               // second number in a row, have a ratio
+               break;
+            } else if ( wasDot && haveNumber ) {
+               // either a comma or decimal was the last character
+               ratioEnd = wordEnd;
+            } else if ( wasDivider && haveNumber ) {
+               // number followed by a divider followed by this number, have a ratio
+               haveRatio = true;
+               ratioEnd = wordEnd;
+            } else {
+               // number preceded by something non-ratio related, reset
+               haveNumber = true;
+               // Possibly the first number in a ratio
+               ratioBegin = wordBegin;
+            }
+         } else if ( isDot ) {
+            if ( wasDigit ) {
+               // either a comma or decimal in a number
+               // do nothing
+            } else if ( wasDot ) {
+               if ( haveRatio ) {
+                  // two dots in a row, have a ratio
+                  break;
+               }
+               // two dots in a row, reset
+               haveNumber = false;
+               ratioBegin = -1;
+               ratioEnd = -1;
+            } else if ( wasDivider ) {
+               if ( haveRatio ) {
+                  // divider then dot, have a ratio
+                  break;
+               }
+               // divider then dot, reset
+               haveNumber = false;
+               ratioBegin = -1;
+               ratioEnd = -1;
+            }
+            // dot preceded by something non-ratio related requires no attention as ratio was not started
+         } else if ( isDivider ) {
+            if ( wasDigit ) {
+               // possibly divider in ratio, do nothing
+            } else if ( wasDot ) {
+               if ( haveRatio ) {
+                  // dot then divider, have a ratio
+                  break;
+               }
+               // dot then divider, reset
+               haveNumber = false;
+               ratioBegin = -1;
+               ratioEnd = -1;
+            } else if ( wasDivider ) {
+               if ( haveRatio ) {
+                  // two dividers in a row, have a ratio
+                  break;
+               }
+               // two dividers in a row, reset
+               haveNumber = false;
+               ratioBegin = -1;
+               ratioEnd = -1;
+            }
+            // divider preceded by something non-ratio related requires no attention as ratio was not started
+         } else {
+            if ( haveRatio ) {
+               // non-ratio related, have a ratio
+               break;
+            }
+            // non-ratio related, reset
+            haveNumber = false;
+            ratioBegin = -1;
+            ratioEnd = -1;
+         }
+         wasDigit = isDigit;
+         wasDot = isDot;
+         wasDivider = isDivider;
+         wordBegin = wordEnd + 1;
+         if ( wordBegin >= rxNorm.length() ) {
+            break;
+         }
+         wordEnd = rxNorm.indexOf( ' ', wordBegin );
+         if ( wordEnd < 0 ) {
+            wordEnd = rxNorm.length();
+         }
+      }
+      if ( haveRatio ) {
+         final Collection<AttributeTextSpan> ratioSpans = new HashSet<>( 1 );
+         final RxRatio rxRatio = new RxRatio( rxNorm.substring( ratioBegin, ratioEnd ).trim() );
+         final AttributeTextSpan attributeTextSpan = new AttributeTextSpan( rxRatio, ratioBegin, ratioEnd - 1 );
+         ratioSpans.add( attributeTextSpan );
+         return ratioSpans;
+      }
+      return Collections.emptySet();
+   }
+
+
+   static public enum Mass implements RxAttribute, RxUnit {
+      KILOGRAM( "kilogram" ),
+      GRAM( "gram", "gm", "g" ),
+      MILLI_EQUIVALENT( "milliequivalent", "meq" ),
+      GRAINS( "grain", "gr" ),
+      MILLIGRAM( "milligram", "mg" ),
+      MICROGRAM( "microgram", "mcg", "ug" ),
+      WET_WEIGHT( "wet weight", "ww" ),
+      POUND( "pound", "lb" );  //household measurement
+      final private String[] __terms;
+
+      private Mass( final String... terms ) {
+         __terms = pluralize( terms );
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum Volume implements RxAttribute, RxUnit {
+      LITRE( "liter", "litre", "l" ),
+      MILLILITRE( "milliliter", "millilitre", "ml" ),
+      MICROLITRE( "microlitre", "microlitres" ),
+      CUBIC_CM( "cc" ),
+      CUP( "cup" ),   // household measurement
+      TABLESPOON( "tablespoon", "tbsp" ),  // household measurement
+      TEASPOON( "teaspoon", "tsp" ),    //household measurement
+      SCOOP( "scoopful", "scoop" ),   // household measurement
+      OUNCE( "ounce", "oz" ),     // household measurement
+      DROP( "drop", "guttae", "gutta", "gtt" ),
+      LF( "lf" );  // What is lf ?
+      final private String[] __terms;
+
+      private Volume( final String... terms ) {
+         __terms = pluralize( terms );
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum Rads implements RxAttribute, RxUnit {
+      CURIE( "curie" ),
+      MILLICURIE( "millicurie", "mci" ),
+      ATOMIC_UNIT( "atomic unit", "au" );
+      final private String[] __terms;
+
+      private Rads( final String... terms ) {
+         __terms = pluralize( terms );
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum ConcUnit implements RxAttribute, RxUnit {
+      MOL( "mol", "mole" ), // not really mass or volume
+      MILLIMOL( "millimol", "millimolar", "mic", "mil" ),
+      MINIMUM_INHIBITORY( "minimum inhibitory concentration", "minimal inhibitory concentration", "mic" );
+      final private String[] __terms;
+
+      private ConcUnit( final String... terms ) {
+         __terms = pluralize( terms );
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum TimeUnit implements RxAttribute, RxUnit {
+      MONTH( "month" ),
+      WEEK( "week" ),
+      DAY( "day" ),
+      HOUR( "hour", "hr" );
+      final private String[] __terms;
+
+      private TimeUnit( final String... terms ) {
+         __terms = pluralize( terms );
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+
+   static public enum Percent implements RxAttribute, RxUnit {
+      PERCENT( "percent", "%", "p" );
+      final private String[] __terms;
+
+      private Percent( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum StrengthUnit implements RxAttribute, RxUnit {
+      INTERNATIONAL_UNIT( "international unit", "iu" ),
+      USP_UNIT( "usp ' u" ),
+      GAUGE( "gauge" );
+      final private String[] __terms;
+
+      private StrengthUnit( final String... terms ) {
+         __terms = pluralize( terms );
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+
+   static public enum UnitMisc implements RxAttribute, RxUnit {
+      UNIT( "unit", "unt", "u", "( + / - ) unit", "( + / - ) u" ),
+      MILLION_UNIT( "mu", "mmu" ),
+      CALIBRATION_UNIT( "cu" ),
+      TEST_UNIT( "tu / test" ),
+      COUNT( "count" ),
+      NORMAL_SALINE( "normal saline", "ns", "normal saline ns" ),
+      ACTUATION( "actuation", "actuat", "acutation", "inh" ),
+      ALLERGY( "bioequivalent allergy unit", "bau" ),
+      ANAESTHESIA( "base anaesthesia unit", "base anesthesia unit", "bau" ),
+      WHAT_WV( "wv", "vv" );
+      final private String[] __terms;
+
+      private UnitMisc( final String... terms ) {
+         __terms = pluralize( terms );
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum Coding implements RxAttribute, RxUnit {
+      PNU( "pnu" ),
+      MMO( "mmo" );
+      final private String[] __terms;
+
+      private Coding( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static private String[] pluralize( final String... terms ) {
+      List<String> allTerms = new ArrayList<>();
+      for ( String term : terms ) {
+         allTerms.add( term + "s" );
+         allTerms.add( term );
+      }
+      return allTerms.toArray( new String[allTerms.size()] );
+   }
+
+   static public enum DosePer implements RxAttribute, Punctuation {
+      PER( "per" ),
+      SLASH( "/" ),
+      DASH( "-" ),
+      COLON( ":" );
+      final private String[] __terms;
+
+      private DosePer( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+
+   static public enum Conjunction implements RxAttribute, Punctuation {
+      COMMA( "," ),
+      SEMICOLON( ";" ),
+      PLUS( "+", "plus" ),
+      FOR( "for" ),
+      THROUGH( "by", "through" ),
+      IN( "in" ),
+      AS( "as" ),
+      AND( "and", "et", "&" ),
+      OR( "or" ),
+      WITH( "with", "w /", "cum" ),
+      WITHOUT( "without", "w / o", "w - o", "sine" );  // also s
+      final private String[] __terms;
+
+      private Conjunction( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum Parenthesis implements RxAttribute, Punctuation {
+      START( "(" ),
+      END( ")" );
+      final private String[] __terms;
+
+      private Parenthesis( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum BadEndParenthesis implements RxAttribute, Punctuation {
+      START( "(" ),
+      PLUS_MINUS( "( + / - )" ),
+      BOTH( "( )", "[ ]", "( < )", "( . )", "<" );
+      final private String[] __terms;
+
+      private BadEndParenthesis( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum Dot implements RxAttribute, Punctuation {
+      DOT( "." ),
+      COMMA( "," );
+      final private String[] __terms;
+
+      private Dot( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+   static public enum DeliveryUnwanted implements RxAttribute {
+      MEDICINE( "medicine" ),
+      MISCELLANEOUS( "miscellaneous", "misc", "miscell" ),
+      UNIDENTIFIED( "unidentified", "unspecified" ),
+      OBSOLETE( "obsolete" ),
+      NOT_USED( "not used" ),
+      OTHER( "other" ),
+      NOT_APPLICABLE( "not applicable" ),
+      COMBINATION( "combination" ),
+      DIST_WATER( "distilled water", "d . w ." ),
+      WATER( "water", "aq", "aqua" ),
+      PRODUCT( "product" ),
+      SUCH( "such", "tal", "talus" ),
+      MAKE( "make", "let it be made", "ft .", "fiat" );
+      final private String[] __terms;
+
+      private DeliveryUnwanted( final String... terms ) {
+         __terms = terms;
+      }
+
+      public String[] getTerms() {
+         return __terms;
+      }
+   }
+
+
+   static private class AttributeTextSpan extends TextSpan {
+      // TODO - change to class, not entire collection of terms
+      final private Class<? extends RxAttribute> __rxAttributeClass;
+
+      private AttributeTextSpan( final RxAttribute rxAttribute, final int begin, final int end ) {
+         super( begin, end );
+         __rxAttributeClass = rxAttribute.getClass();
+      }
+
+      final public Class<? extends RxAttribute> getRxAttributeClass() {
+         return __rxAttributeClass;
+      }
+
+      final public boolean isRxAttributeClass( final Class<? extends RxAttribute> rxAttributeClass ) {
+         return rxAttributeClass.isAssignableFrom( __rxAttributeClass );
+      }
+   }
+
+   static private class TextSpan {
+      final private int __begin;
+      final private int __end;
+
+      private TextSpan( final int begin, final int end ) {
+         __begin = begin;
+         __end = end;
+      }
+
+      final public int getBegin() {
+         return __begin;
+      }
+
+      final public int getEnd() {
+         return __end;
+      }
+
+      final public boolean contains( final TextSpan textSpan ) {
+         return __begin <= textSpan.__begin && __end >= textSpan.__end;
+      }
+
+      final public int distance( final TextSpan textSpan ) {
+         if ( contains( textSpan ) || textSpan.contains( this ) ) {
+            return 0;
+         }
+         // Will be negative when the text spans overlap
+         return Math.max( __begin, textSpan.__begin ) - Math.min( __end, textSpan.__end );
+      }
+
+      public int hashCode() {
+         return __end * 10 + __begin;
+      }
+
+      public boolean equals( final Object that ) {
+         return (that instanceof TextSpan) && hashCode() == that.hashCode();
+      }
+   }
+
+   //   static private enum TextSpanComparator implements Comparator<TextSpan> {
+   //      INSTANCE;
+   //      static public TextSpanComparator getInstance() {
+   //         return INSTANCE;
+   //      }
+   //      public int compare( final TextSpan textSpan1, final TextSpan textSpan2 ) {
+   //         final int beginOff = textSpan1.__begin - textSpan2.__begin;
+   //         if ( beginOff != 0 ) {
+   //            return beginOff;
+   //         }
+   //         return textSpan1.__end - textSpan2.__end;
+   //      }
+   //   }
+
+}

Propchange: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/DeliveryUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/DoseUtil.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/DoseUtil.java?rev=1624062&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/DoseUtil.java (added)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/DoseUtil.java Wed Sep 10 17:30:42 2014
@@ -0,0 +1,251 @@
+package org.apache.ctakes.dictionarytool.util;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 7/14/14
+ */
+final public class DoseUtil {
+
+   private DoseUtil() {
+   }
+
+   static private final String[] NUMBER_WORD = {"billion", "million", "thousand", "hundred"};
+
+   // some of these are not strictly units, e.g. "ud" : "ut dictum" or "as directed"
+   // but can be properly trimmed as they appear in the same place as would a unit
+
+   static private final String[] UNITS = {"%", "%ww", "%vv", "%wv", "percent",
+                                          "g", "gr", "gm", "gram",
+                                          "mg", "milligram",
+                                          "microgram", "micrograms", "mcg", "ug",
+                                          "millicurie", "mic", "oz",
+                                          "l", "lf", "ml", "milliliter", "milliequivalent",
+                                          "unt", "meq", "hr", "day", "days", "weekly", "biweekly",
+                                          "usp", "titradose", "unit", "units", "iu", "u", "mmu",
+                                          "mm", "cm", "[iu]", "[usp'u]", "[usp,u]", "gauge", "intl", "[pnu]",
+                                          "au", "[au]", "bau", "[bau]", "mci", "ud",
+                                          "% ww", "% vv", "% wv",
+                                          "[ iu ]", "[ usp ' u ]", "[ usp , u ]", "[ pnu ]",
+                                          "[ au ]", "[ bau ]"};
+
+   static private final String[] UNIT_PRE = {"%/", "%-", "mg/", "mg-", "milligram/",
+                                             "microgram/", "micrograms/", "mcg/", "mcg-",
+                                             "g/", "g-", "gm/", "gm-", "gram/", "mmo/", "cu/",
+                                             "ml/", "ml-", "milliliter/", "l/", "meq/",
+                                             "unt/", "u/", "u-", "unit/", "unit-", "units/", "units-",
+                                             "au/", "bau/", "mci/",
+                                             "% /", "% -", "mg /", "mg -", "milligram /",
+                                             "microgram /", "micrograms /", "mcg /", "mcg -",
+                                             "g /", "g -", "gm /", "gm -", "gram /", "mmo /", "cu /",
+                                             "ml /", "ml -", "milliliter /", "l /", "meq /",
+                                             "unt /", "u /", "u -", "unit /", "unit -", "units /", "units -",
+                                             "au /", "bau /", "mci /"};
+
+   static private final String[] UNIT_POST = {"/%", "/p",
+                                              "/g", "/gm", "/gram", "/mg", "/milligram",
+                                              "/microgram", "/micrograms", "/mcg",
+                                              "/ml", "/milliliter", "/hr", "/day", "/24hr", "/l",
+                                              "/actuat", "/actuation", "/vial", "/vil", "/pkt", "1dose",
+                                              "/ %", "/ p",
+                                              "/ g", "/ gm", "/ gram", "/ mg", "/ milligram",
+                                              "/ microgram", "/ micrograms", "/ mcg",
+                                              "/ ml", "/ milliliter", "/ hr", "/ day", "/ 24hr", "/ l",
+                                              "/ actuat", "/ actuation", "/ vial", "/ vil", "/ pkt"};
+
+
+   //   static private Set<String> getDoseFreeComboTerms( final String term ) {
+   //      final Set<String> termSet = new HashSet<String>();
+   //      final int inIndex = term.indexOf( " in " );
+   //      final int andIndex = term.indexOf( " and " );
+   //      final int withIndex = term.indexOf( " w/ " );
+   //      if ( inIndex > 0 ) {
+   //         final String doseFree1 = getDoseFreeTerm( term.substring( 0, inIndex ) );
+   //         final Set<String> doseFree2 = getDoseFreeComboTerms( term.substring( inIndex+4 ) );
+   //         termSet.add( doseFree1 );
+   //         for ( String doseFreeTerm : doseFree2 ) {
+   //            termSet.add( doseFree1 + " in " + doseFreeTerm );
+   //         }
+   //      } else if ( andIndex > 0 ) {
+   //         final String doseFree1 = getDoseFreeTerm( term.substring( 0, andIndex ) );
+   //         final Set<String> doseFree2 = getDoseFreeComboTerms( term.substring( andIndex+5 ) );
+   //         termSet.add( doseFree1 );
+   //         for ( String doseFreeTerm : doseFree2 ) {
+   //            termSet.add( doseFree1 + " and " + doseFreeTerm );
+   //         }
+   //      } else if (withIndex > 0 ) {
+   //         final String doseFree1 = getDoseFreeTerm( term.substring( 0, withIndex ) );
+   //         final Set<String> doseFree2 = getDoseFreeComboTerms( term.substring( withIndex+4 ) );
+   //         termSet.add( doseFree1 );
+   //         for ( String doseFreeTerm : doseFree2 ) {
+   //            termSet.add( doseFree1 + " w/ " + doseFreeTerm );
+   //         }
+   //      } else {
+   //         termSet.add( getDoseFreeTerm( term ) );
+   //      }
+   //      return termSet;
+   //   }
+
+   static public String getDoseFreeText( final String term ) {
+      // check for (and substring) things like "aspirin 2 mg" in rxnorm term - just want "aspirin"
+      // Anything with a number that is required "superdrug mark 3" will be added by the full orangebook name
+      // though different doses can have different cuis, keeping the doses can prevent the drug from being captured
+      // and the drug is more important than its dose
+      // (which rarely follows the rxnorm format, can be determined later, etc.)
+
+      // TODO Can't rely on splits as the term has been tokenized
+      // TODO Detokenize?       Or just use contains() instead of equals, etc.
+      // TODO Make a pattern builder for "( wwww )" vs. "wwww" vs ", wwww", etc?
+
+      final String[] splits = term.split( "\\s+" );
+      if ( splits.length <= 1 ) {
+         return term;
+      }
+      boolean skipMode = false;
+      final StringBuilder sb = new StringBuilder();
+      for ( int i = 0; i < splits.length; i++ ) {
+         if ( splits[i].isEmpty() ) {
+            continue;
+         }
+         if ( skipMode ) {
+            if ( isFullyUnit( splits[i] ) || splits[i].equals( "per" ) || isNumberWord( splits[i] ) ) {
+               continue;
+            }
+         }
+         skipMode = false;
+         if ( Character.isDigit( splits[i].charAt( 0 ) ) || splits[i].charAt( 0 ) == '.' ) {
+            if ( splits.length > i + 1
+                  && (isFullyUnit( splits[i + 1] ) || isNumberWord( splits[i + 1] ))
+                  && isNumber( splits[i] ) ) {
+               skipMode = true;
+               i++;
+               continue;
+            }
+            if ( hasPostUnit( splits[i] ) || endsWithUnit( splits[i] ) ) {
+               skipMode = true;
+               continue;
+            }
+            if ( isDimension( splits[i] ) ) {
+               skipMode = true;
+               continue;
+            }
+         }
+         sb.append( " " ).append( splits[i] );
+      }
+      return sb.toString().trim();
+   }
+
+
+   static private boolean isNumber( final String text ) {
+      try {
+         Float.parseFloat( text );
+         return true;
+      } catch ( NumberFormatException nfE ) {
+         if ( Character.isDigit( text.charAt( text.length() - 1 ) ) ) {
+            if ( text.contains( "," ) ) {
+               return isNumber( text.replaceAll( ",", "" ) );
+            } // trim for poorly formed dosages like "500mg/5 ml" or "5%-100 mg/100 ml"
+            for ( String preUnit : UNIT_PRE ) {
+               if ( text.contains( preUnit ) ) {
+                  return isNumber( text.replaceAll( preUnit, "" ) );
+               }
+            }
+            if ( text.contains( "-" ) ) {
+               return isNumber( text.replaceAll( "-", "" ) );
+            }
+         }
+         // keep going
+      }
+      return false;
+   }
+
+   static private boolean isNumberWord( final String text ) {
+      for ( String word : NUMBER_WORD ) {
+         if ( text.equals( word ) ) {
+            return true;
+         }
+      }
+      return false;
+   }
+
+   static private boolean isFullyUnit( final String text ) {
+      return hasPreUnit( text ) || hasPostUnit( text ) || isUnit( text );
+   }
+
+   static private boolean hasUnit( final String text ) {
+      final String unit = getUnit( text );
+      return !unit.isEmpty()
+            && (text.length() == unit.length() || Character.isDigit( text.charAt(
+            text.length() - unit.length() - 1 ) ));
+   }
+
+   static private boolean isUnit( final String text ) {
+      final String unit = getUnit( text );
+      return !unit.isEmpty();
+   }
+
+   static private boolean endsWithUnit( final String text ) {
+      final String unit = getUnit( text );
+      return !unit.isEmpty() && text.length() > unit.length() && Character.isDigit( text.charAt(
+            text.length() - unit.length() - 1 ) );
+   }
+
+   static private String getUnit( final String text ) {
+      final String commaUnit = getUnit( text, ',' );
+      if ( !commaUnit.isEmpty() ) {
+         return commaUnit;
+      }
+      final String semiUnit = getUnit( text, ';' );
+      if ( !semiUnit.isEmpty() ) {
+         return semiUnit;
+      }
+      String longestUnit = "";
+      for ( String unit : UNITS ) {
+         if ( unit.length() > longestUnit.length() && text.endsWith( unit ) ) {
+            longestUnit = unit;
+         }
+      }
+      return longestUnit;
+   }
+
+   static private String getUnit( final String text, final char postPunct ) {
+      if ( text.charAt( text.length() - 1 ) != postPunct ) {
+         return "";
+      }
+      String longestUnit = "";
+      for ( String unit : UNITS ) {
+         if ( unit.length() > longestUnit.length()
+               && (text.endsWith( unit + postPunct ) || text.endsWith( unit + " " + postPunct )) ) {
+            longestUnit = unit;
+         }
+      }
+      if ( !longestUnit.isEmpty() ) {
+         return longestUnit + postPunct;
+      }
+      return "";
+   }
+
+   static private boolean hasPreUnit( final String text ) {
+      for ( String unitPre : UNIT_PRE ) {
+         if ( text.contains( unitPre ) ) {
+            return true;
+         }
+      }
+      return false;
+   }
+
+   static private boolean isDimension( final String text ) {
+      return text.endsWith( "\"" ) && (text.contains( "\"x" ) || text.contains( "\" x" ));
+   }
+
+   static private boolean hasPostUnit( final String text ) {
+      for ( String unitPost : UNIT_POST ) {
+         if ( text.endsWith( unitPost ) || text.endsWith( unitPost + "," ) || text.endsWith( unitPost + " ," ) ) {
+            return true;
+         }
+      }
+      return false;
+   }
+
+
+}

Propchange: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/DoseUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message