lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nam...@apache.org
Subject [lucene-solr] branch master updated: LUCENE-9544: Port Nori dictionary compilation (#1926)
Date Mon, 28 Sep 2020 11:28:33 GMT
This is an automated email from the ASF dual-hosted git repository.

namgyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 00d7f5e  LUCENE-9544: Port Nori dictionary compilation (#1926)
00d7f5e is described below

commit 00d7f5ea68d8eaec618e4019714fda02060539a6
Author: Namgyu Kim <namgyu@apache.org>
AuthorDate: Mon Sep 28 20:28:21 2020 +0900

    LUCENE-9544: Port Nori dictionary compilation (#1926)
---
 build.gradle                  |  1 +
 gradle/generation/nori.gradle | 84 +++++++++++++++++++++++++++++++++++++++++++
 lucene/CHANGES.txt            |  2 ++
 3 files changed, 87 insertions(+)

diff --git a/build.gradle b/build.gradle
index 00e04d3..9826b57 100644
--- a/build.gradle
+++ b/build.gradle
@@ -150,6 +150,7 @@ apply from: file('gradle/generation/javacc.gradle')
 apply from: file('gradle/generation/util.gradle')
 apply from: file('gradle/generation/snowball.gradle')
 apply from: file('gradle/generation/kuromoji.gradle')
+apply from: file('gradle/generation/nori.gradle')
 
 // Additional development aids.
 apply from: file('gradle/maven/maven-local.gradle')
diff --git a/gradle/generation/nori.gradle b/gradle/generation/nori.gradle
new file mode 100644
index 0000000..eb6afa1
--- /dev/null
+++ b/gradle/generation/nori.gradle
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This downloads and compiles Nori dictionaries.
+
+def recompileDictionary(project, dictionaryName, Closure closure) {
+  project.javaexec {
+    main = "org.apache.lucene.analysis.ko.util.DictionaryBuilder"
+    classpath = project.sourceSets.main.runtimeClasspath
+
+    jvmArgs '-Xmx1G'
+
+    with closure
+  }
+  project.logger.lifecycle("Automaton regenerated from dictionary: ${dictionaryName}")
+}
+
+configure(project(":lucene:analysis:nori")) {
+  apply plugin: 'java-library'
+  apply plugin: "de.undercouch.download"
+
+  ext {
+    targetDir = file("src/resources")
+  }
+
+  task deleteDictionaryData() {
+    // There should really be just one but since we don't know which
+    // one it'll be, let's process all of them.
+    doFirst {
+      sourceSets.main.resources.srcDirs.each { location ->
+        delete fileTree(dir: location, include: "org/apache/lucene/analysis/ko/dict/*.dat")
+      }
+    }
+  }
+
+  task compileMecabKo(type: Download) {
+    description "Recompile dictionaries from Mecab-Ko data."
+    group "generation"
+
+    dependsOn deleteDictionaryData
+    dependsOn sourceSets.main.runtimeClasspath
+
+    def dictionaryName = "mecab-ko-dic-2.0.3-20170922"
+    def dictionarySource = "https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/${dictionaryName}.tar.gz"
+    def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.tar.gz")
+    def unpackedDir = file("${buildDir}/generate/${dictionaryName}")
+
+    src dictionarySource
+    dest dictionaryFile
+    onlyIfModified true
+
+    doLast {
+      // Unpack the downloaded archive.
+      delete unpackedDir
+      ant.untar(src: dictionaryFile, dest: unpackedDir, compression: "gzip") {
+        ant.cutdirsmapper(dirs: "1")
+      }
+
+      // Compile the dictionary
+      recompileDictionary(project, dictionaryName, {
+        args += [
+            unpackedDir,
+            targetDir,
+            "utf-8",
+            false
+        ]
+      })
+    }
+  }
+}
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4fffed0..383ac84 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -184,6 +184,8 @@ Other
 
 * LUCENE-9497: Integrate Error Prone, a static analysis tool during compilation (Dawid Weiss,
Varun Thacker)
 
+* LUCENE-9544: add regenerate gradle script for nori dictionary (Namgyu Kim)
+
 ======================= Lucene 8.7.0 =======================
 
 API Changes


Mime
View raw message