lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [14/33] lucenenet git commit: Lucene.Net.Benchmark: Created a simple English number formatter to spell out numbers into words. Since we don't need localization, this is a sufficient replacement for the ICU RuleBasedNumberFormatter.
Date Sun, 06 Aug 2017 17:59:12 GMT
Lucene.Net.Benchmark: Created a simple English number formatter to spell out numbers into words.
Since we don't need localization, this is a sufficient replacement for the ICU RuleBasedNumberFormatter.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1cfbd8b7
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1cfbd8b7
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1cfbd8b7

Branch: refs/heads/master
Commit: 1cfbd8b7c35c7f1ae2bd616b44d752eef4d7d180
Parents: a60c5ef
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Tue Aug 1 21:11:54 2017 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Wed Aug 2 09:55:15 2017 +0700

----------------------------------------------------------------------
 .../ByTask/Feeds/LongToEnglishContentSource.cs  |   6 +-
 .../ByTask/Feeds/LongToEnglishQueryMaker.cs     |   4 +-
 .../Lucene.Net.Benchmark.csproj                 |   1 +
 .../Support/EnglishNumberFormatExtensions.cs    | 186 +++++++++++++++++++
 .../Lucene.Net.Tests.Benchmark.csproj           |   1 +
 .../TestEnglishNumberFormatExtensions.cs        |  38 ++++
 6 files changed, 231 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs b/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs
index fadab82..7c407a2 100644
--- a/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs
@@ -1,4 +1,5 @@
-using System;
+using Lucene.Net.Support;
+using System;
 using System.Globalization;
 
 namespace Lucene.Net.Benchmarks.ByTask.Feeds
@@ -55,8 +56,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
                     }
                 }
 
-                // LUCENENET TODO: Rules based number formatting...(from ICU)
-                docData.Body = curCounter.ToString(); //rnbf.format(curCounter);
+                docData.Body = curCounter.ToWords(); //rnbf.format(curCounter);
                 docData.Name = "doc_" + curCounter.ToString(CultureInfo.InvariantCulture);
                 docData.Title = "title_" + curCounter.ToString(CultureInfo.InvariantCulture);
                 docData.SetDate(new DateTime());

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs b/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs
index f565eb8..78ac924 100644
--- a/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs
@@ -4,6 +4,7 @@ using Lucene.Net.Benchmarks.ByTask.Tasks;
 using Lucene.Net.Benchmarks.ByTask.Utils;
 using Lucene.Net.QueryParsers.Classic;
 using Lucene.Net.Search;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 using System;
 
@@ -48,9 +49,8 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
         {
             lock (this)
             {
-                // LUCENENET TODO: Rules based number formatter (from ICU)
                 //return parser.Parse("" + rnbf.format(GetNextCounter()) + "");
-                return m_parser.Parse(GetNextCounter().ToString());
+                return m_parser.Parse(GetNextCounter().ToWords());
             }
         }
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
index 0241099..f00cd18 100644
--- a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
+++ b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
@@ -159,6 +159,7 @@
     <Compile Include="Quality\Utils\QualityQueriesFinder.cs" />
     <Compile Include="Quality\Utils\SimpleQQParser.cs" />
     <Compile Include="Quality\Utils\SubmissionReport.cs" />
+    <Compile Include="Support\EnglishNumberFormatExtensions.cs" />
     <Compile Include="Utils\ExtractReuters.cs" />
     <Compile Include="Utils\ExtractWikipedia.cs" />
     <Compile Include="..\CommonAssemblyInfo.cs">

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs b/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs
new file mode 100644
index 0000000..71362f0
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs
@@ -0,0 +1,186 @@
+using System;
+using System.Text;
+
+namespace Lucene.Net.Support
+{
+    /// <summary>
+    /// Extension methods to spell out numbers into English. 
+    /// <para/>
+    /// Inspiration: https://stackoverflow.com/a/2601001
+    /// </summary>
+    public static class EnglishNumberFormatExtensions
+    {
+        private const long Quadrillion = Trillion * 1000;
+        private const long Trillion = Billion * 1000;
+        private const long Billion = Million * 1000;
+        private const long Million = Thousand * 1000;
+        private const long Thousand = Hundred * 10;
+        private const long Hundred = 100;
+
+        /// <summary>
+        /// Returns the spelled-out English words for the provided <paramref name="value"/>.
+        /// </summary>
+        public static string ToWords(this int value)
+        {
+            return ToWords((long)value);
+        }
+
+        /// <summary>
+        /// Returns the spelled-out English words for the provided <paramref name="value"/>.
+        /// </summary>
+        public static string ToWords(this long value)
+        {
+            return ToWords(value, new StringBuilder()).ToString();
+        }
+        private static StringBuilder ToWords(long value, StringBuilder builder)
+        {
+            if (value == 0) builder.Append("zero");
+
+            if (value < 0)
+            {
+                builder.Append("negative ");
+                ToWords(Math.Abs(value), builder);
+            }
+
+            long unit = 0;
+
+            if (value >= Quadrillion)
+            {
+                unit = (value / Quadrillion);
+                value -= unit * Quadrillion;
+
+                ToWords(unit, builder);
+                builder.Append(" quadrillion");
+                if (value > 0) builder.Append(" ");
+            }
+
+            if (value >= Trillion)
+            {
+                unit = (value / Trillion);
+                value -= unit * Trillion;
+
+                ToWords(unit, builder);
+                builder.Append(" trillion");
+                if (value > 0) builder.Append(" ");
+            }
+
+            if (value >= Billion)
+            {
+                unit = (value / Billion);
+                value -= unit * Billion;
+
+                ToWords(unit, builder);
+                builder.Append(" billion");
+                if (value > 0) builder.Append(" ");
+            }
+
+            if (value >= Million)
+            {
+                unit = (value / Million);
+                value -= unit * Million;
+
+                ToWords(unit, builder);
+                builder.Append(" million");
+                if (value > 0) builder.Append(" ");
+            }
+
+            if (value >= Thousand)
+            {
+                unit = (value / Thousand);
+                value -= unit * Thousand;
+
+                ToWords(unit, builder);
+                builder.Append(" thousand");
+                if (value > 0) builder.Append(" ");
+            }
+
+            if (value >= Hundred)
+            {
+                unit = (value / Hundred);
+                value -= unit * Hundred;
+
+                ToWords(unit, builder);
+                builder.Append(" hundred");
+                if (value > 0) builder.Append(" ");
+            }
+
+            if (value >= 90)
+            {
+                value -= 90;
+                builder.Append("ninety");
+                if (value > 0) builder.Append("-");
+            }
+
+            if (value >= 80)
+            {
+                value -= 80;
+                builder.Append("eighty");
+                if (value > 0) builder.Append("-");
+            }
+
+            if (value >= 70)
+            {
+                value -= 70;
+                builder.Append("seventy");
+                if (value > 0) builder.Append("-");
+            }
+
+            if (value >= 60)
+            {
+                value -= 60;
+                builder.Append("sixty");
+                if (value > 0) builder.Append("-");
+            }
+
+            if (value >= 50)
+            {
+                value -= 50;
+                builder.Append("fifty");
+                if (value > 0) builder.Append("-");
+            }
+
+            if (value >= 40)
+            {
+                value -= 40;
+                builder.Append("forty");
+                if (value > 0) builder.Append("-");
+            }
+
+            if (value >= 30)
+            {
+                value -= 30;
+                builder.Append("thirty");
+                if (value > 0) builder.Append("-");
+            }
+
+            if (value >= 20)
+            {
+                value -= 20;
+                builder.Append("twenty");
+                if (value > 0) builder.Append("-");
+            }
+
+            if (value == 19) builder.Append("nineteen");
+            if (value == 18) builder.Append("eighteen");
+            if (value == 17) builder.Append("seventeen");
+            if (value == 16) builder.Append("sixteen");
+            if (value == 15) builder.Append("fifteen");
+            if (value == 14) builder.Append("fourteen");
+            if (value == 13) builder.Append("thirteen");
+            if (value == 12) builder.Append("twelve");
+            if (value == 11) builder.Append("eleven");
+            if (value == 10) builder.Append("ten");
+            if (value == 9) builder.Append("nine");
+            if (value == 8) builder.Append("eight");
+            if (value == 7) builder.Append("seven");
+            if (value == 6) builder.Append("six");
+            if (value == 5) builder.Append("five");
+            if (value == 4) builder.Append("four");
+            if (value == 3) builder.Append("three");
+            if (value == 2) builder.Append("two");
+            if (value == 1) builder.Append("one");
+
+            return builder;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj b/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj
index c57a59f..5c9ffe1 100644
--- a/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj
+++ b/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj
@@ -68,6 +68,7 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="Quality\TestQualityRun.cs" />
     <Compile Include="Support\TestApiConsistency.cs" />
+    <Compile Include="Support\TestEnglishNumberFormatExtensions.cs" />
     <Compile Include="Support\TestExceptionSerialization.cs" />
   </ItemGroup>
   <ItemGroup>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Tests.Benchmark/Support/TestEnglishNumberFormatExtensions.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Benchmark/Support/TestEnglishNumberFormatExtensions.cs b/src/Lucene.Net.Tests.Benchmark/Support/TestEnglishNumberFormatExtensions.cs
new file mode 100644
index 0000000..68cc70a
--- /dev/null
+++ b/src/Lucene.Net.Tests.Benchmark/Support/TestEnglishNumberFormatExtensions.cs
@@ -0,0 +1,38 @@
+using Lucene.Net.Attributes;
+using NUnit.Framework;
+
+namespace Lucene.Net.Support
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    [TestFixture]
+    public class TestEnglishNumberFormatExtensions
+    {
+        [Test, LuceneNetSpecific]
+        public void TestToWords()
+        {
+            Assert.AreEqual("twenty-one", 21.ToWords());
+            Assert.AreEqual("one thousand two hundred thirty-four", 1234.ToWords());
+            Assert.AreEqual("six million four hundred ninety-one thousand three hundred forty-eight",
6491348.ToWords());
+            Assert.AreEqual("one hundred thirty", 130.ToWords());
+            Assert.AreEqual("one hundred thirty-seven", 137.ToWords());
+            Assert.AreEqual("seven hundred forty-nine million one hundred thirty-two thousand
one hundred forty-six", 749132146.ToWords());
+            Assert.AreEqual("nine hundred ninety-nine billion seven hundred forty-nine million
one hundred thirty-two thousand one hundred forty-six", 999749132146.ToWords());
+        }
+    }
+}


Mime
View raw message