lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From no...@apache.org
Subject [05/50] [abbrv] lucene-solr:apiv2: SOLR-8903: Move SolrJ DateUtil to contrib/extraction as ExtractionDateUtil. And removed obsolete methods.
Date Thu, 07 Apr 2016 20:29:37 GMT
SOLR-8903: Move SolrJ DateUtil to contrib/extraction as ExtractionDateUtil.
And removed obsolete methods.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5e5fd662
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5e5fd662
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5e5fd662

Branch: refs/heads/apiv2
Commit: 5e5fd662575105de88d8514b426bccdcb4c76948
Parents: 39932f5
Author: David Smiley <dsmiley@apache.org>
Authored: Wed Mar 30 15:00:29 2016 -0400
Committer: David Smiley <dsmiley@apache.org>
Committed: Wed Mar 30 15:00:29 2016 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   6 +
 .../extraction/ExtractingRequestHandler.java    |  23 +-
 .../handler/extraction/ExtractionDateUtil.java  | 178 +++++++++++++
 .../handler/extraction/SolrContentHandler.java  |  12 +-
 .../extraction/TestExtractionDateUtil.java      |  61 +++++
 .../solr/morphlines/cell/SolrCellBuilder.java   |  18 +-
 .../morphlines/cell/SolrCellMorphlineTest.java  |   4 +-
 .../org/apache/solr/common/util/DateUtil.java   | 259 -------------------
 .../apache/solr/common/util/TestDateUtil.java   |  35 ---
 9 files changed, 270 insertions(+), 326 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e5fd662/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 875b720..8a0b866 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -180,6 +180,9 @@ Upgrading from Solr 5.x
   When there is a non-zero number of milliseconds, it is padded with zeros to 3 digits. Negative
year (BC) dates are
   now possible.  Parsing: It is now an error to supply a portion of the date out of its,
range, like 67 seconds.
 
+* SolrJ no longer includes DateUtil. If for some reason you need to format or parse dates,
simply use Instant.format()
+  and Instant.parse().
+
 Detailed Change List
 ----------------------
 
@@ -522,6 +525,9 @@ Other Changes
   now parse (and format) dates with a leading '+' or '-' (BC dates or dates > 4 digit
year.
   [value] and ms() and contrib/analytics now parse with date math. (David Smiley)
 
+* SOLR-8904: DateUtil in SolrJ moved to the extraction contrib as ExtractionDateUtil.  Obsolete
methods were removed.
+  (David Smiley)
+
 ==================  5.5.1 ==================
 
 Bug Fixes

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e5fd662/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
----------------------------------------------------------------------
diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
index af70c62..82fe633 100644
--- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
@@ -17,31 +17,30 @@
 package org.apache.solr.handler.extraction;
 
 
+import java.io.File;
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.common.util.DateUtil;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.ContentStreamHandlerBase;
+import org.apache.solr.handler.loader.ContentStreamLoader;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.security.AuthorizationContext;
 import org.apache.solr.security.PermissionNameProvider;
 import org.apache.solr.update.processor.UpdateRequestProcessor;
 import org.apache.solr.util.plugin.SolrCoreAware;
-import org.apache.solr.handler.ContentStreamHandlerBase;
-import org.apache.solr.handler.loader.ContentStreamLoader;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.mime.MimeTypeException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.File;
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-
 
 /**
  * Handler for rich documents like PDF or Word or any other file format that Tika handles
that need the text to be extracted
@@ -59,7 +58,7 @@ public class ExtractingRequestHandler extends ContentStreamHandlerBase implement
   protected ParseContextConfig parseContextConfig;
 
 
-  protected Collection<String> dateFormats = DateUtil.DEFAULT_DATE_FORMATS;
+  protected Collection<String> dateFormats = ExtractionDateUtil.DEFAULT_DATE_FORMATS;
   protected SolrContentHandlerFactory factory;
 
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e5fd662/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractionDateUtil.java
----------------------------------------------------------------------
diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractionDateUtil.java
b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractionDateUtil.java
new file mode 100644
index 0000000..b7ccf82
--- /dev/null
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractionDateUtil.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.time.Instant;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Collection;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.TimeZone;
+
+
+/**
+ * This class has some code from HttpClient DateUtil.
+ */
+public class ExtractionDateUtil {
+  //start HttpClient
+  /**
+   * Date format pattern used to parse HTTP date headers in RFC 1123 format.
+   */
+  public static final String PATTERN_RFC1123 = "EEE, dd MMM yyyy HH:mm:ss zzz";
+
+  /**
+   * Date format pattern used to parse HTTP date headers in RFC 1036 format.
+   */
+  public static final String PATTERN_RFC1036 = "EEEE, dd-MMM-yy HH:mm:ss zzz";
+
+  /**
+   * Date format pattern used to parse HTTP date headers in ANSI C
+   * <code>asctime()</code> format.
+   */
+  public static final String PATTERN_ASCTIME = "EEE MMM d HH:mm:ss yyyy";
+  //These are included for back compat
+  private static final Collection<String> DEFAULT_HTTP_CLIENT_PATTERNS = Arrays.asList(
+          PATTERN_ASCTIME, PATTERN_RFC1036, PATTERN_RFC1123);
+
+  private static final Date DEFAULT_TWO_DIGIT_YEAR_START;
+
+  static {
+    Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
+    calendar.set(2000, Calendar.JANUARY, 1, 0, 0);
+    DEFAULT_TWO_DIGIT_YEAR_START = calendar.getTime();
+  }
+
+  private static final TimeZone GMT = TimeZone.getTimeZone("GMT");
+
+  //end HttpClient
+
+  //---------------------------------------------------------------------------------------
+
+  /**
+   * Differs by {@link DateTimeFormatter#ISO_INSTANT} in that it's lenient.
+   */
+  public static final DateTimeFormatter ISO_8601_PARSER = new DateTimeFormatterBuilder()
+      .parseCaseInsensitive().parseLenient().appendInstant().toFormatter(Locale.ROOT);
+
+  /**
+   * A suite of default date formats that can be parsed, and thus transformed to the Solr
specific format
+   */
+  public static final Collection<String> DEFAULT_DATE_FORMATS = new ArrayList<>();
+
+  static {
+    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd'T'HH:mm:ss'Z'");
+    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd'T'HH:mm:ss");
+    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd");
+    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd hh:mm:ss");
+    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd HH:mm:ss");
+    DEFAULT_DATE_FORMATS.add("EEE MMM d hh:mm:ss z yyyy");
+    DEFAULT_DATE_FORMATS.addAll(DEFAULT_HTTP_CLIENT_PATTERNS);
+  }
+
+  /**
+   * Returns a formatter that can be use by the current thread if needed to
+   * convert Date objects to the Internal representation.
+   *
+   * @param d The input date to parse
+   * @return The parsed {@link java.util.Date}
+   * @throws java.text.ParseException If the input can't be parsed
+   */
+  public static Date parseDate(String d) throws ParseException {
+    return parseDate(d, DEFAULT_DATE_FORMATS);
+  }
+
+  public static Date parseDate(String d, Collection<String> fmts) throws ParseException
{
+    if (d.length() > 0 && d.charAt(d.length() - 1) == 'Z') {
+      try {
+        return new Date(ISO_8601_PARSER.parse(d, Instant::from).toEpochMilli());
+      } catch (Exception e) {
+        //ignore; perhaps we can parse with one of the formats below...
+      }
+    }
+    return parseDate(d, fmts, null);
+  }
+
+  /**
+   * Slightly modified from org.apache.commons.httpclient.util.DateUtil.parseDate
+   * <p>
+   * Parses the date value using the given date formats.
+   *
+   * @param dateValue   the date value to parse
+   * @param dateFormats the date formats to use
+   * @param startDate   During parsing, two digit years will be placed in the range
+   *                    <code>startDate</code> to <code>startDate + 100
years</code>. This value may
+   *                    be <code>null</code>. When <code>null</code>
is given as a parameter, year
+   *                    <code>2000</code> will be used.
+   * @return the parsed date
+   * @throws ParseException if none of the dataFormats could parse the dateValue
+   */
+  public static Date parseDate(
+          String dateValue,
+          Collection<String> dateFormats,
+          Date startDate
+  ) throws ParseException {
+
+    if (dateValue == null) {
+      throw new IllegalArgumentException("dateValue is null");
+    }
+    if (dateFormats == null) {
+      dateFormats = DEFAULT_HTTP_CLIENT_PATTERNS;
+    }
+    if (startDate == null) {
+      startDate = DEFAULT_TWO_DIGIT_YEAR_START;
+    }
+    // trim single quotes around date if present
+    // see issue #5279
+    if (dateValue.length() > 1
+            && dateValue.startsWith("'")
+            && dateValue.endsWith("'")
+            ) {
+      dateValue = dateValue.substring(1, dateValue.length() - 1);
+    }
+
+    //TODO upgrade to Java 8 DateTimeFormatter. But how to deal with the GMT as a default?
+    SimpleDateFormat dateParser = null;
+    Iterator formatIter = dateFormats.iterator();
+
+    while (formatIter.hasNext()) {
+      String format = (String) formatIter.next();
+      if (dateParser == null) {
+        dateParser = new SimpleDateFormat(format, Locale.ENGLISH);
+        dateParser.setTimeZone(GMT);
+        dateParser.set2DigitYearStart(startDate);
+      } else {
+        dateParser.applyPattern(format);
+      }
+      try {
+        return dateParser.parse(dateValue);
+      } catch (ParseException pe) {
+        // ignore this exception, we will try the next format
+      }
+    }
+
+    // we were unable to parse the date
+    throw new ParseException("Unable to parse the date " + dateValue, 0);
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e5fd662/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
----------------------------------------------------------------------
diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
index 442e64c..7779451 100644
--- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
@@ -17,7 +17,6 @@
 package org.apache.solr.handler.extraction;
 
 import java.lang.invoke.MethodHandles;
-import java.text.DateFormat;
 import java.util.ArrayDeque;
 import java.util.Collection;
 import java.util.Collections;
@@ -31,7 +30,6 @@ import java.util.Set;
 
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.DateUtil;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.schema.TrieDateField;
@@ -83,7 +81,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
   private Set<String> literalFieldNames = null;
   
   public SolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
-    this(metadata, params, schema, DateUtil.DEFAULT_DATE_FORMATS);
+    this(metadata, params, schema, ExtractionDateUtil.DEFAULT_DATE_FORMATS);
   }
 
 
@@ -317,7 +315,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
   /**
    * Can be used to transform input values based on their {@link org.apache.solr.schema.SchemaField}
    * <p>
-   * This implementation only formats dates using the {@link org.apache.solr.common.util.DateUtil}.
+   * This implementation only formats dates using the {@link ExtractionDateUtil}.
    *
    * @param val    The value to transform
    * @param schFld The {@link org.apache.solr.schema.SchemaField}
@@ -328,10 +326,8 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
     if (schFld != null && schFld.getType() instanceof TrieDateField) {
       //try to transform the date
       try {
-        Date date = DateUtil.parseDate(val, dateFormats);
-        DateFormat df = DateUtil.getThreadLocalDateFormat();
-        result = df.format(date);
-
+        Date date = ExtractionDateUtil.parseDate(val, dateFormats); // may throw
+        result = date.toInstant().toString();//ISO format
       } catch (Exception e) {
         // Let the specific fieldType handle errors
         // throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid value:
" + val + " for field: " + schFld, e);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e5fd662/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/TestExtractionDateUtil.java
----------------------------------------------------------------------
diff --git a/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/TestExtractionDateUtil.java
b/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/TestExtractionDateUtil.java
new file mode 100644
index 0000000..9632cb9
--- /dev/null
+++ b/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/TestExtractionDateUtil.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.text.ParseException;
+import java.util.Date;
+import java.util.Locale;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestExtractionDateUtil extends LuceneTestCase {
+
+  public void testISO8601() throws Exception {
+    // dates with atypical years
+    assertParseFormatEquals("0001-01-01T01:01:01Z", null);
+    assertParseFormatEquals("+12021-12-01T03:03:03Z", null);
+
+    assertParseFormatEquals("0000-04-04T04:04:04Z", null); // note: 0 AD is also known as
1 BC
+
+    // dates with negative years (BC)
+    assertParseFormatEquals("-0005-05-05T05:05:05Z", null);
+    assertParseFormatEquals("-2021-12-01T04:04:04Z", null);
+    assertParseFormatEquals("-12021-12-01T02:02:02Z", null);
+
+    // dates that only parse thanks to lenient mode of DateTimeFormatter
+    assertParseFormatEquals("10995-12-31T23:59:59.990Z", "+10995-12-31T23:59:59.990Z"); //
missing '+' 5 digit year
+    assertParseFormatEquals("995-1-2T3:4:5Z", "0995-01-02T03:04:05Z"); // wasn't 0 padded
+  }
+
+  private static void assertParseFormatEquals(String inputStr, String expectedStr) throws
ParseException {
+    if (expectedStr == null) {
+      expectedStr = inputStr;
+    }
+    Date inputDate = ExtractionDateUtil.parseDate(inputStr);
+    String resultStr = inputDate.toInstant().toString();
+    assertEquals("d:" + inputDate.getTime(), expectedStr, resultStr);
+  }
+
+  public void testParseDate() throws ParseException {
+    assertParsedDate(1226583351000L, "Thu Nov 13 04:35:51 AKST 2008");
+  }
+
+  private static void assertParsedDate(long ts, String dateStr) throws ParseException {
+    long parsed = ExtractionDateUtil.parseDate(dateStr).getTime();
+    assertTrue(String.format(Locale.ENGLISH, "Incorrect parsed timestamp: %d != %d (%s)",
ts, parsed, dateStr), Math.abs(ts - parsed) <= 1000L);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e5fd662/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
index a5c73bc..00045b2 100644
--- a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
+++ b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
@@ -29,13 +29,19 @@ import java.util.Map;
 import java.util.Map.Entry;
 import java.util.TreeMap;
 
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ListMultimap;
+import com.google.common.io.Closeables;
+import com.typesafe.config.Config;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.SolrInputField;
 import org.apache.solr.common.params.MultiMapSolrParams;
 import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.DateUtil;
 import org.apache.solr.common.util.SuppressForbidden;
 import org.apache.solr.handler.extraction.ExtractingParams;
+import org.apache.solr.handler.extraction.ExtractionDateUtil;
 import org.apache.solr.handler.extraction.SolrContentHandler;
 import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
 import org.apache.solr.morphlines.solr.SolrLocator;
@@ -50,7 +56,6 @@ import org.apache.tika.sax.XHTMLContentHandler;
 import org.apache.tika.sax.xpath.Matcher;
 import org.apache.tika.sax.xpath.MatchingContentHandler;
 import org.apache.tika.sax.xpath.XPathParser;
-
 import org.kitesdk.morphline.api.Command;
 import org.kitesdk.morphline.api.CommandBuilder;
 import org.kitesdk.morphline.api.MorphlineCompilationException;
@@ -63,13 +68,6 @@ import org.kitesdk.morphline.stdio.AbstractParser;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-import com.google.common.base.Joiner;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.ListMultimap;
-import com.google.common.io.Closeables;
-import com.typesafe.config.Config;
-
 /**
  * Command that pipes the first attachment of a record into one of the given Tika parsers,
then maps
  * the Tika output back to a record using SolrCell.
@@ -151,7 +149,7 @@ public final class SolrCellBuilder implements CommandBuilder {
         cellParams.put(ExtractingParams.XPATH_EXPRESSION, xpathExpr);
       }
       
-      this.dateFormats = getConfigs().getStringList(config, "dateFormats", new ArrayList<>(DateUtil.DEFAULT_DATE_FORMATS));
+      this.dateFormats = getConfigs().getStringList(config, "dateFormats", new ArrayList<>(ExtractionDateUtil.DEFAULT_DATE_FORMATS));
       
       String handlerStr = getConfigs().getString(config, "solrContentHandlerFactory", TrimSolrContentHandlerFactory.class.getName());
       Class<? extends SolrContentHandlerFactory> factoryClass;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e5fd662/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
b/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
index 9a43b42..2cdbd3c 100644
--- a/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
+++ b/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
@@ -25,7 +25,7 @@ import org.apache.commons.io.FileUtils;
 import org.apache.lucene.util.Constants;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.MapSolrParams;
-import org.apache.solr.common.util.DateUtil;
+import org.apache.solr.handler.extraction.ExtractionDateUtil;
 import org.apache.solr.handler.extraction.SolrContentHandler;
 import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
 import org.apache.solr.schema.IndexSchema;
@@ -270,7 +270,7 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase
{
     // which will cause the ContentHandler to be invoked.
     metadata.set(fieldName, getFoobarWithNonChars());
     StripNonCharSolrContentHandlerFactory contentHandlerFactory =
-      new StripNonCharSolrContentHandlerFactory(DateUtil.DEFAULT_DATE_FORMATS);
+      new StripNonCharSolrContentHandlerFactory(ExtractionDateUtil.DEFAULT_DATE_FORMATS);
     IndexSchema schema = h.getCore().getLatestSchema();
     SolrContentHandler contentHandler =
       contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()),
schema);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e5fd662/solr/solrj/src/java/org/apache/solr/common/util/DateUtil.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/DateUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/DateUtil.java
deleted file mode 100644
index 9bf7a2b..0000000
--- a/solr/solrj/src/java/org/apache/solr/common/util/DateUtil.java
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.common.util;
-import java.io.IOException;
-import java.text.DateFormat;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Calendar;
-import java.util.Collection;
-import java.util.Date;
-import java.util.Iterator;
-import java.util.Locale;
-import java.util.TimeZone;
-
-
-/**
- * This class has some code from HttpClient DateUtil.
- */
-public class DateUtil {
-  //start HttpClient
-  /**
-   * Date format pattern used to parse HTTP date headers in RFC 1123 format.
-   */
-  public static final String PATTERN_RFC1123 = "EEE, dd MMM yyyy HH:mm:ss zzz";
-
-  /**
-   * Date format pattern used to parse HTTP date headers in RFC 1036 format.
-   */
-  public static final String PATTERN_RFC1036 = "EEEE, dd-MMM-yy HH:mm:ss zzz";
-
-  /**
-   * Date format pattern used to parse HTTP date headers in ANSI C
-   * <code>asctime()</code> format.
-   */
-  public static final String PATTERN_ASCTIME = "EEE MMM d HH:mm:ss yyyy";
-  //These are included for back compat
-  private static final Collection<String> DEFAULT_HTTP_CLIENT_PATTERNS = Arrays.asList(
-          PATTERN_ASCTIME, PATTERN_RFC1036, PATTERN_RFC1123);
-
-  private static final Date DEFAULT_TWO_DIGIT_YEAR_START;
-
-  static {
-    Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
-    calendar.set(2000, Calendar.JANUARY, 1, 0, 0);
-    DEFAULT_TWO_DIGIT_YEAR_START = calendar.getTime();
-  }
-
-  private static final TimeZone GMT = TimeZone.getTimeZone("GMT");
-
-  //end HttpClient
-
-  //---------------------------------------------------------------------------------------
-
-  /**
-   * A suite of default date formats that can be parsed, and thus transformed to the Solr
specific format
-   */
-  public static final Collection<String> DEFAULT_DATE_FORMATS = new ArrayList<>();
-
-  static {
-    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd'T'HH:mm:ss'Z'");
-    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd'T'HH:mm:ss");
-    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd");
-    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd hh:mm:ss");
-    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd HH:mm:ss");
-    DEFAULT_DATE_FORMATS.add("EEE MMM d hh:mm:ss z yyyy");
-    DEFAULT_DATE_FORMATS.addAll(DEFAULT_HTTP_CLIENT_PATTERNS);
-  }
-
-  /**
-   * Returns a formatter that can be use by the current thread if needed to
-   * convert Date objects to the Internal representation.
-   *
-   * @param d The input date to parse
-   * @return The parsed {@link java.util.Date}
-   * @throws java.text.ParseException If the input can't be parsed
-   */
-  public static Date parseDate(String d) throws ParseException {
-    return parseDate(d, DEFAULT_DATE_FORMATS);
-  }
-
-  public static Date parseDate(String d, Collection<String> fmts) throws ParseException
{
-    // 2007-04-26T08:05:04Z
-    if (d.endsWith("Z") && d.length() > 20) {
-      return getThreadLocalDateFormat().parse(d);
-    }
-    return parseDate(d, fmts, null);
-  }
-
-  /**
-   * Slightly modified from org.apache.commons.httpclient.util.DateUtil.parseDate
-   * <p>
-   * Parses the date value using the given date formats.
-   *
-   * @param dateValue   the date value to parse
-   * @param dateFormats the date formats to use
-   * @param startDate   During parsing, two digit years will be placed in the range
-   *                    <code>startDate</code> to <code>startDate + 100
years</code>. This value may
-   *                    be <code>null</code>. When <code>null</code>
is given as a parameter, year
-   *                    <code>2000</code> will be used.
-   * @return the parsed date
-   * @throws ParseException if none of the dataFormats could parse the dateValue
-   */
-  public static Date parseDate(
-          String dateValue,
-          Collection<String> dateFormats,
-          Date startDate
-  ) throws ParseException {
-
-    if (dateValue == null) {
-      throw new IllegalArgumentException("dateValue is null");
-    }
-    if (dateFormats == null) {
-      dateFormats = DEFAULT_HTTP_CLIENT_PATTERNS;
-    }
-    if (startDate == null) {
-      startDate = DEFAULT_TWO_DIGIT_YEAR_START;
-    }
-    // trim single quotes around date if present
-    // see issue #5279
-    if (dateValue.length() > 1
-            && dateValue.startsWith("'")
-            && dateValue.endsWith("'")
-            ) {
-      dateValue = dateValue.substring(1, dateValue.length() - 1);
-    }
-
-    SimpleDateFormat dateParser = null;
-    Iterator formatIter = dateFormats.iterator();
-
-    while (formatIter.hasNext()) {
-      String format = (String) formatIter.next();
-      if (dateParser == null) {
-        dateParser = new SimpleDateFormat(format, Locale.ENGLISH);
-        dateParser.setTimeZone(GMT);
-        dateParser.set2DigitYearStart(startDate);
-      } else {
-        dateParser.applyPattern(format);
-      }
-      try {
-        return dateParser.parse(dateValue);
-      } catch (ParseException pe) {
-        // ignore this exception, we will try the next format
-      }
-    }
-
-    // we were unable to parse the date
-    throw new ParseException("Unable to parse the date " + dateValue, 0);
-  }
-
-
-  /**
-   * Returns a formatter that can be use by the current thread if needed to
-   * convert Date objects to the Internal representation.
-   *
-   * @return The {@link java.text.DateFormat} for the current thread
-   */
-  public static DateFormat getThreadLocalDateFormat() {
-    return fmtThreadLocal.get();
-  }
-
-  public static TimeZone UTC = TimeZone.getTimeZone("UTC");
-  private static ThreadLocalDateFormat fmtThreadLocal = new ThreadLocalDateFormat();
-
-  private static class ThreadLocalDateFormat extends ThreadLocal<DateFormat> {
-    DateFormat proto;
-
-    public ThreadLocalDateFormat() {
-      super();
-      //2007-04-26T08:05:04Z
-      SimpleDateFormat tmp = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT);
-      tmp.setTimeZone(UTC);
-      proto = tmp;
-    }
-
-    @Override
-    protected DateFormat initialValue() {
-      return (DateFormat) proto.clone();
-    }
-  }
-
-  /** Formats the date and returns the calendar instance that was used (which may be reused)
*/
-  public static Calendar formatDate(Date date, Calendar cal, Appendable out) throws IOException
{
-    // using a stringBuilder for numbers can be nice since
-    // a temporary string isn't used (it's added directly to the
-    // builder's buffer.
-
-    StringBuilder sb = out instanceof StringBuilder ? (StringBuilder)out : new StringBuilder();
-    if (cal==null) cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
-    cal.setTime(date);
-
-    int i = cal.get(Calendar.YEAR);
-    sb.append(i);
-    sb.append('-');
-    i = cal.get(Calendar.MONTH) + 1;  // 0 based, so add 1
-    if (i<10) sb.append('0');
-    sb.append(i);
-    sb.append('-');
-    i=cal.get(Calendar.DAY_OF_MONTH);
-    if (i<10) sb.append('0');
-    sb.append(i);
-    sb.append('T');
-    i=cal.get(Calendar.HOUR_OF_DAY); // 24 hour time format
-    if (i<10) sb.append('0');
-    sb.append(i);
-    sb.append(':');
-    i=cal.get(Calendar.MINUTE);
-    if (i<10) sb.append('0');
-    sb.append(i);
-    sb.append(':');
-    i=cal.get(Calendar.SECOND);
-    if (i<10) sb.append('0');
-    sb.append(i);
-    i=cal.get(Calendar.MILLISECOND);
-    if (i != 0) {
-      sb.append('.');
-      if (i<100) sb.append('0');
-      if (i<10) sb.append('0');
-      sb.append(i);
-
-      // handle canonical format specifying fractional
-      // seconds shall not end in '0'.  Given the slowness of
-      // integer div/mod, simply checking the last character
-      // is probably the fastest way to check.
-      int lastIdx = sb.length()-1;
-      if (sb.charAt(lastIdx)=='0') {
-        lastIdx--;
-        if (sb.charAt(lastIdx)=='0') {
-          lastIdx--;
-        }
-        sb.setLength(lastIdx+1);
-      }
-
-    }
-    sb.append('Z');
-
-    if (out != sb)
-      out.append(sb);
-
-    return cal;
-  }
-
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e5fd662/solr/solrj/src/test/org/apache/solr/common/util/TestDateUtil.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/common/util/TestDateUtil.java b/solr/solrj/src/test/org/apache/solr/common/util/TestDateUtil.java
deleted file mode 100644
index 1eab4f5..0000000
--- a/solr/solrj/src/test/org/apache/solr/common/util/TestDateUtil.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.common.util;
-
-import java.text.ParseException;
-import java.util.Locale;
-
-import org.apache.lucene.util.LuceneTestCase;
-
-public class TestDateUtil extends LuceneTestCase {
-
-  public void testParseDate() throws ParseException {
-    assertParsedDate(1226583351000L, "Thu Nov 13 04:35:51 AKST 2008");
-  }
-    
-  private static void assertParsedDate(long ts, String dateStr) throws ParseException {
-    long parsed = DateUtil.parseDate(dateStr).getTime();
-    assertTrue(String.format(Locale.ENGLISH, "Incorrect parsed timestamp: %d != %d (%s)",
ts, parsed, dateStr), Math.abs(ts - parsed) <= 1000L);
-  }
-
-}


Mime
View raw message