lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Lingviston <vladislav.ermo...@outlook.com>
Subject How to get hits coordinates in Lucene 4.4.0
Date Mon, 12 Aug 2013 08:02:12 GMT
Hi, I'm trying to use Lucene in my Android project. To start with I've
created a small demo app. It works with .txt files but I need to work with
.pdf. So analyzing my code I understand that it will have some issues with
.pdfs due to memory management. However the question I want to ask here is
not related to memory but to hit highlighting. It works now but using of
`Highlighter` class with pdfs is not what I want. So to implement my own
highlighting I need to know some kind of coordinates of found words in the
text. How can I get them? I'm using lucene 4.4.0 while all of the examples
like here are for much older versions. Here is my code: 

    public class MainActivity extends Activity { 
   
//-----------------------------------------------------------------------------------------------------

    // 
    // Constants 
    // 
   
//-----------------------------------------------------------------------------------------------------

        public static final String FIELD_PATH = "path"; 
        public static final String FIELD_CONTENTS = "contents"; 
        
   
//-----------------------------------------------------------------------------------------------------

    // 
    // Fields 
    // 
   
//-----------------------------------------------------------------------------------------------------

        private EditText mEditText; 
        private TextView mTextView; 
        
   
//-----------------------------------------------------------------------------------------------------

    // 
    // Methods 
    // 
   
//-----------------------------------------------------------------------------------------------------

    @Override 
    protected void onCreate(Bundle savedInstanceState) { 
        super.onCreate(savedInstanceState); 
        setContentView(R.layout.activity_main); 
        findViews(); 
        initViews(); 
        createIndex(); 
    } 

    private void findViews() { 
    mEditText = (EditText) findViewById(R.id.activity_main_edittext); 
    mTextView = (TextView) findViewById(R.id.activity_main_textview); 
    } 
    
    private void initViews() { 
    mEditText.setOnEditorActionListener(mEditorActionListener); 
    } 

    private void performSearch(String searchString) { 
    try { 
                        Directory directory =
NIOFSDirectory.open(getExternalFilesDir(null)); 
                        DirectoryReader ireader =
DirectoryReader.open(directory); 
                    IndexSearcher isearcher = new IndexSearcher(ireader); 
        
                        Analyzer analyzer = new
StandardAnalyzer(Version.LUCENE_44); 
                        QueryParser queryParser = new
AnalyzingQueryParser(Version.LUCENE_44, FIELD_CONTENTS, analyzer); 
                        Query query = queryParser.parse(searchString); 
                        TopDocs topDocs = isearcher.search(query, null,
1000); 
                        ScoreDoc[] docs = topDocs.scoreDocs; 
                        
                        StringBuilder result = new StringBuilder(); 
                        StringBuilder debugInfo = new StringBuilder(); 
                        debugInfo.append("Number of hits: "); 
                        debugInfo.append(docs.length); 
                        debugInfo.append("\n"); 
                        
                        // Iterate through the results: 
                        for (int i = 0; i < docs.length; i++) { 
                                Document hitDoc =
isearcher.doc(docs[i].doc); 
                                
                                String path = hitDoc.get(FIELD_PATH); 
                                debugInfo.append("Path: "); 
                                debugInfo.append(path); 
                                debugInfo.append("\n"); 
                                
                               
result.append("-------------------------------------------------------"); 
                                result.append("File: "); 
                                result.append(path); 
                               
result.append("-------------------------------------------------------"); 
                                result.append("<br>"); 
                                
                                String content = hitDoc.get(FIELD_CONTENTS); 
                                QueryScorer scorer = new QueryScorer(query); 
                                Highlighter highlighter = new
Highlighter(new SimpleHTMLFormatter("", ""), scorer); 
                                highlighter.setTextFragmenter(new
SimpleSpanFragmenter(scorer, Integer.MAX_VALUE)); 
                                String highlighted =
highlighter.getBestFragment(analyzer, FIELD_CONTENTS, content); 
                               
result.append("-------------------------------------------------------"); 
                                result.append("Contents: "); 
                               
result.append("-------------------------------------------------------"); 
                                result.append("<br>"); 
                                result.append(highlighted); 
                                result.append("<br><br><br>"); 
                        } 
                        
                        //not working 
                        /*PostingsHighlighter highlighter = new
PostingsHighlighter(); 
                        String highlights[] =
highlighter.highlight(FIELD_CONTENTS, query, isearcher, topDocs);*/ 
                        mTextView.setText(Html.fromHtml(result.toString())); 
                        Log.d(getClass().getSimpleName(),
debugInfo.toString()); 
    } catch (Exception e) { 
    e.printStackTrace(); 
    Log.e(getClass().getSimpleName(), e.getMessage()); 
    } 

    } 
    
    private void createIndex() { 
    try { 
    //Create directory for index. 
    Directory indexDirectory = new
NIOFSDirectory(getExternalFilesDir(null)); 
    
                        Analyzer analyzer = new
StandardAnalyzer(Version.LUCENE_44); 
                        
                        IndexWriterConfig config = new
IndexWriterConfig(Version.LUCENE_44, analyzer); 
                        config.setOpenMode(OpenMode.CREATE); 
                        
                        IndexWriter indexWriter = new
IndexWriter(indexDirectory, config); 
                        
                        //Loop through files in specified directory and
adding them to index. 
                        File dir = new
File(Environment.getExternalStorageDirectory() + "/lucene"); 
                        File[] files = dir.listFiles(); 
                        for (File file : files) { 
                                Document document = new Document(); 
        
                                { 
                                        FieldType fieldType = new
FieldType(TextField.TYPE_STORED); 
                                       
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); 
                                        
                                        String path =
file.getCanonicalPath(); 
                                        document.add(new Field(FIELD_PATH,
path, fieldType)); 
                                } 
        
                                { 
                                        FieldType fieldType = new
FieldType(TextField.TYPE_STORED); 
                                        fieldType.setIndexed(true); 
                                       
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); 
                                        fieldType.setStored(true); 
                                        fieldType.setStoreTermVectors(true); 
                                        fieldType.setTokenized(true); 
                                       
fieldType.setStoreTermVectorOffsets(true); 
                                        String content = readFully(new
FileReader(file)); //we can't store Reader objects but we need to be able to
access the content for highlighting 
                                        document.add(new
Field(FIELD_CONTENTS, content, fieldType)); 
                                } 
        
                                indexWriter.addDocument(document); 
                        } 
                        indexWriter.close(); 
    } catch (Exception e) { 
    e.printStackTrace(); 
    } 
        } 
    
    public static String readFully(Reader reader) throws IOException { 
     char[] arr = new char[8*1024]; // 8K at a time 
     StringBuffer buf = new StringBuffer(); 
     int numChars; 

     while ((numChars = reader.read(arr, 0, arr.length)) > 0) { 
         buf.append(arr, 0, numChars); 
     } 

     return buf.toString(); 
       } 
    
    @Override 
    public boolean onCreateOptionsMenu(Menu menu) { 
        getMenuInflater().inflate(R.menu.main, menu); 
        return true; 
    } 
    
   
//-----------------------------------------------------------------------------------------------------

    // 
    // Listeners 
    // 
   
//-----------------------------------------------------------------------------------------------------

    private OnEditorActionListener mEditorActionListener = new
OnEditorActionListener() { 
                @Override 
                public boolean onEditorAction(TextView v, int actionId,
KeyEvent event) { 
                        if (actionId == EditorInfo.IME_ACTION_SEARCH) { 
                    performSearch(v.getText().toString()); 
                    return true; 
                } 
                return false; 
        } 
}; 
} 

So how can I get hit coordinates and maybe you have any other advices what
I'm doing wrong? This is rather common task I think so it must be rather
simple.



--
View this message in context: http://lucene.472066.n3.nabble.com/How-to-get-hits-coordinates-in-Lucene-4-4-0-tp4083913.html
Sent from the Lucene - Java Users mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message