lucene-pylucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From va...@apache.org
Subject svn commit: r1422426 - /lucene/pylucene/trunk/test/test_PyLucene.py
Date Sun, 16 Dec 2012 00:28:13 GMT
Author: vajda
Date: Sun Dec 16 00:28:12 2012
New Revision: 1422426

URL: http://svn.apache.org/viewvc?rev=1422426&view=rev
Log:
migrated test_PyLucene.py

Modified:
    lucene/pylucene/trunk/test/test_PyLucene.py

Modified: lucene/pylucene/trunk/test/test_PyLucene.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/test/test_PyLucene.py?rev=1422426&r1=1422425&r2=1422426&view=diff
==============================================================================
--- lucene/pylucene/trunk/test/test_PyLucene.py (original)
+++ lucene/pylucene/trunk/test/test_PyLucene.py Sun Dec 16 00:28:12 2012
@@ -13,9 +13,23 @@
 # ====================================================================
 
 import os, shutil
+import lucene   # so that 'org' and 'java' are found
 
 from unittest import TestCase, main
-from lucene import *
+
+from java.io import File, StringReader
+from org.apache.lucene.analysis.core import WhitespaceAnalyzer
+from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer
+from org.apache.lucene.analysis.standard import StandardAnalyzer
+from org.apache.lucene.document import \
+    Document, Field, StoredField, StringField, TextField
+from org.apache.lucene.index import \
+    IndexWriter, IndexWriterConfig, DirectoryReader, MultiFields, Term
+from org.apache.lucene.queryparser.classic import \
+    MultiFieldQueryParser, QueryParser
+from org.apache.lucene.search import BooleanClause, IndexSearcher, TermQuery
+from org.apache.lucene.store import MMapDirectory, SimpleFSDirectory
+from org.apache.lucene.util import BytesRefIterator, Version
 
 
 class Test_PyLuceneBase(object):
@@ -29,15 +43,24 @@ class Test_PyLuceneBase(object):
     def closeStore(self, store, *args):
         pass
 
-    def getWriter(self, store, analyzer, create=False):
-        writer = IndexWriter(store, analyzer, create,
-                             IndexWriter.MaxFieldLength.LIMITED)
-        #writer.setUseCompoundFile(False)
+    def getWriter(self, store, analyzer=None, create=False):
+
+        if analyzer is None:
+            analyzer = WhitespaceAnalyzer(Version.LUCENE_CURRENT)
+        analyzer = LimitTokenCountAnalyzer(analyzer, 10000)
+        config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
+        if create:
+            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
+        writer = IndexWriter(store, config)
+
         return writer
 
     def getReader(self, store, analyzer):
         pass
 
+    def getSearcher(self, store):
+        return IndexSearcher(DirectoryReader.open(store))
+
     def test_indexDocument(self):
 
         store = self.openStore()
@@ -48,15 +71,15 @@ class Test_PyLuceneBase(object):
 
             doc = Document()
             doc.add(Field("title", "value of testing",
-                          Field.Store.YES, Field.Index.ANALYZED))
+                          TextField.TYPE_STORED))
             doc.add(Field("docid", str(1),
-                          Field.Store.NO, Field.Index.NOT_ANALYZED))
+                          StringField.TYPE_NOT_STORED))
             doc.add(Field("owner", "unittester",
-                          Field.Store.YES, Field.Index.NOT_ANALYZED))
+                          StringField.TYPE_STORED))
             doc.add(Field("search_name", "wisdom",
-                          Field.Store.YES, Field.Index.NO))
+                          StoredField.TYPE))
             doc.add(Field("meta_words", "rabbits are beautiful",
-                          Field.Store.NO, Field.Index.ANALYZED))
+                          TextField.TYPE_NOT_STORED))
         
             writer.addDocument(doc)
         finally:
@@ -72,15 +95,15 @@ class Test_PyLuceneBase(object):
         
             doc = Document()
             doc.add(Field("title", "value of testing",
-                          Field.Store.YES, Field.Index.ANALYZED))
+                          TextField.TYPE_STORED))
             doc.add(Field("docid", str(1),
-                          Field.Store.NO, Field.Index.NOT_ANALYZED))
+                          StringField.TYPE_NOT_STORED))
             doc.add(Field("owner", "unittester",
-                          Field.Store.YES, Field.Index.NOT_ANALYZED))
+                          StringField.TYPE_STORED))
             doc.add(Field("search_name", "wisdom",
-                          Field.Store.YES, Field.Index.NO))
+                          StoredField.TYPE))
             doc.add(Field("meta_words", "rabbits are beautiful",
-                          Field.Store.NO, Field.Index.ANALYZED))
+                          TextField.TYPE_NOT_STORED))
 
             body_text = "hello world" * 20
             body_reader = StringReader(body_text)
@@ -100,15 +123,15 @@ class Test_PyLuceneBase(object):
         
             doc = Document()
             doc.add(Field("title", "value of testing",
-                          Field.Store.YES, Field.Index.ANALYZED))
+                          TextField.TYPE_STORED))
             doc.add(Field("docid", str(1),
-                          Field.Store.NO, Field.Index.NOT_ANALYZED))
+                          StringField.TYPE_NOT_STORED))
             doc.add(Field("owner", "unittester",
-                          Field.Store.YES, Field.Index.NOT_ANALYZED))
+                          StringField.TYPE_STORED))
             doc.add(Field("search_name", "wisdom",
-                          Field.Store.YES, Field.Index.NO))
+                          StoredField.TYPE))
             doc.add(Field("meta_words", "rabbits are beautiful",
-                          Field.Store.NO, Field.Index.ANALYZED))
+                          TextField.TYPE_NOT_STORED))
 
             # using a unicode body cause problems, which seems very odd
             # since the python type is the same regardless affter doing
@@ -128,13 +151,13 @@ class Test_PyLuceneBase(object):
         store = self.openStore()
         searcher = None
         try:
-            searcher = IndexSearcher(store, True)
+            searcher = self.getSearcher(store)
             query = QueryParser(Version.LUCENE_CURRENT, "title",
                                 self.getAnalyzer()).parse("value")
             topDocs = searcher.search(query, 50)
             self.assertEqual(topDocs.totalHits, 1)
         finally:
-            self.closeStore(store, searcher)
+            self.closeStore(store)
 
     def test_searchDocumentsWithMultiField(self):
         """
@@ -145,7 +168,7 @@ class Test_PyLuceneBase(object):
         store = self.openStore()
         searcher = None
         try:
-            searcher = IndexSearcher(store, True)
+            searcher = self.getSearcher(store)
             SHOULD = BooleanClause.Occur.SHOULD
             query = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT,
                                                 "value", ["title", "docid"],
@@ -154,7 +177,7 @@ class Test_PyLuceneBase(object):
             topDocs = searcher.search(query, 50)
             self.assertEquals(1, topDocs.totalHits)
         finally:
-            self.closeStore(store, searcher)
+            self.closeStore(store)
         
     def test_removeDocument(self):
 
@@ -162,53 +185,53 @@ class Test_PyLuceneBase(object):
 
         store = self.openStore()
         searcher = None
-        reader = None
+        writer = None
 
         try:
-            searcher = IndexSearcher(store, True)
+            searcher = self.getSearcher(store)
             query = TermQuery(Term("docid", str(1)))
             topDocs = searcher.search(query, 50)
             self.assertEqual(topDocs.totalHits, 1)
             # be careful with ids they are ephemeral
             docid = topDocs.scoreDocs[0].doc
         
-            reader = IndexReader.open(store, False)
-            reader.deleteDocument(docid)
+            writer = self.getWriter(store)
+            writer.deleteDocuments(Term("docid", str(1)))
         finally:
-            self.closeStore(store, searcher, reader)
+            self.closeStore(store, writer)
 
         store = self.openStore()
         searcher = None
         try:
-            searcher = IndexSearcher(store, True)
+            searcher = self.getSearcher(store)
             query = TermQuery(Term("docid", str(1)))
             topDocs = searcher.search(query, 50)
             self.assertEqual(topDocs.totalHits, 0)
         finally:
-            self.closeStore(store, searcher)
+            self.closeStore(store)
         
     def test_removeDocuments(self):
 
         self.test_indexDocument()
 
         store = self.openStore()
-        reader = None
+        writer = None
         try:
-            reader = IndexReader.open(store, False)
-            reader.deleteDocuments(Term('docid', str(1)))
+            writer = self.getWriter(store)
+            writer.deleteDocuments(Term('docid', str(1)))
         finally:
-            self.closeStore(store, reader)
+            self.closeStore(store, writer)
         
         store = self.openStore()
         searcher = None
         try:
-            searcher = IndexSearcher(store, True)
+            searcher = self.getSearcher(store)
             query = QueryParser(Version.LUCENE_CURRENT, "title",
                                 self.getAnalyzer()).parse("value")
             topDocs = searcher.search(query, 50)
             self.assertEqual(topDocs.totalHits, 0)
         finally:
-            self.closeStore(store, searcher)
+            self.closeStore(store)
         
     def test_FieldEnumeration(self):
 
@@ -222,25 +245,25 @@ class Test_PyLuceneBase(object):
             writer = self.getWriter(store, analyzer, False)
             doc = Document()
             doc.add(Field("title", "value of testing",
-                          Field.Store.YES, Field.Index.ANALYZED))
+                          TextField.TYPE_STORED))
             doc.add(Field("docid", str(2),
-                          Field.Store.NO, Field.Index.NOT_ANALYZED))
+                          StringField.TYPE_NOT_STORED))
             doc.add(Field("owner", "unittester",
-                          Field.Store.YES, Field.Index.NOT_ANALYZED))
+                          StringField.TYPE_STORED))
             doc.add(Field("search_name", "wisdom",
-                          Field.Store.YES, Field.Index.NO))
+                          StoredField.TYPE))
             doc.add(Field("meta_words", "rabbits are beautiful",
-                          Field.Store.NO, Field.Index.ANALYZED))
+                          TextField.TYPE_NOT_STORED))
                                    
             writer.addDocument(doc)
         
             doc = Document()
             doc.add(Field("owner", "unittester",
-                          Field.Store.NO, Field.Index.NOT_ANALYZED))
+                          StringField.TYPE_NOT_STORED))
             doc.add(Field("search_name", "wisdom",
-                          Field.Store.YES, Field.Index.NO))
+                          StoredField.TYPE))
             doc.add(Field("meta_words", "rabbits are beautiful",
-                          Field.Store.NO, Field.Index.ANALYZED))
+                          TextField.TYPE_NOT_STORED))
             writer.addDocument(doc)        
         finally:
             self.closeStore(store, writer)
@@ -248,11 +271,10 @@ class Test_PyLuceneBase(object):
         store = self.openStore()
         reader = None
         try:
-            reader = IndexReader.open(store, True)
-            term_enum = MultiFields.getTerms(reader, "docid").iterator()
+            reader = DirectoryReader.open(store)
+            term_enum = MultiFields.getTerms(reader, "docid").iterator(None)
             docids = []
-
-            for term in term_enum:
+            for term in BytesRefIterator.cast_(term_enum):
                 docids.append(term.utf8ToString())
             self.assertEqual(len(docids), 2)
         finally:
@@ -265,17 +287,17 @@ class Test_PyLuceneBase(object):
         store = self.openStore()
         reader = None
         try:
-            reader = IndexReader.open(store, True)
-            fieldInfos = ReaderUtil.getMergedFieldInfos(reader)
+            reader = DirectoryReader.open(store)
+            fieldInfos = MultiFields.getMergedFieldInfos(reader)
             for fieldInfo in fieldInfos.iterator():
                 self.assert_(fieldInfo.name in ['owner', 'search_name',
                                                 'meta_words', 'docid', 'title'])
         
-                if fieldInfo.isIndexed:
+                if fieldInfo.isIndexed():
                     self.assert_(fieldInfo.name in ['owner', 'meta_words',
                                                     'docid', 'title'])
 
-                if fieldInfo.isIndexed and not fieldInfo.storeTermVector:
+                if fieldInfo.isIndexed() and not fieldInfo.hasVectors():
                     self.assert_(fieldInfo.name in ['owner', 'meta_words',
                                                     'docid', 'title'])
         finally:



Mime
View raw message