hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Apache Wiki <wikidi...@apache.org>
Subject [Hadoop Wiki] Update of "DistributedLucene" by MarkButler
Date Thu, 15 May 2008 16:47:17 GMT
Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Hadoop Wiki" for change notification.

The following page has been changed by MarkButler:
http://wiki.apache.org/hadoop/DistributedLucene

------------------------------------------------------------------------------
  
  Doug Cutting's original proposal: http://www.mail-archive.com/general@lucene.apache.org/msg00338.html
  
- == The key classes for the API ==
+ Code for this work is now available here:
+ https://issues.apache.org/jira/browse/HADOOP-3394
  
+ Also see
+ Bailey project - http://www.sourceforge.net/projects/bailey
+ Katta project - http://www.sourceforge.net/projects/katta
+ Contrib for updating indexes using MapReduce - https://issues.apache.org/jira/browse/HADOOP-2951
- {{{
- public class IndexVersion implements Comparable<IndexVersion>, Writable, Constants
{
-   public IndexVersion(String name);
-   public IndexVersion nextVersion();
-   public String getName();
-   public int getVersion();
- }
- }}}
  
+ === Implementation Notes (Obsolete, retained for comments) ===
- {{{
- public class IndexLocation implements Comparable<IndexLocation>, Writable {
-   public IndexLocation(InetSocketAddress address, IndexVersion indexVersion, IndexState
state);
-   public IndexLocation nextVersion();
-   public InetSocketAddress getAddress();
-   public IndexVersion getIndexVersion();
-   public IndexState getState();
-   public void setState(IndexState state);
- }
- }}}
- 
- {{{
- public class DataNodeConfiguration implements Writable, Constants { 
-   public DataNodeConfiguration(Configuration conf, String machineName, int port, String
rack);
-   public Configuration getConfiguration();
-   public InetSocketAddress getAddress();
-   public String getRack();
- } 
- }}}
- 
- {{{
- public class DataNodeStatusInformation implements Writable {
-   public DataNodeStatusInformation();
-   public DataNodeConfiguration getDataNodeConfiguration();
-   public long getCapacity();
-   public long getCapacityUsed();
-   public long getCapacityRemaining();
-   public int getNumberReplicationTask();
- }
- }}}
- 
- {{{
- public class SearchResults implements Writable {
-   public int size();
-   public Document get(int d);
- }}}
- 
- {{{
-   public enum IndexState {
-     UNCOMMITTED, REPLICATING, LIVE, UNKNOWN
-   };
- }}}
- 
- 
- == The revised interfaces ==
- 
- === Client to Data node ===
- 
- {{{
- public interface ClientToDataNodeProtocol extends VersionedProtocol {
-   void addDocument(String index, Document doc);
-   int removeDocuments(String index, Term term); 
-   IndexVersion commitVersion(String index);
-   void createIndex(String index); 
-   void addIndex(String index, IndexLocation indexToAdd);
-   SearchResults search(IndexVersion i, Query query, Sort sort, int n);
- }
- }}}
- 
- === Client to Name node ===
- 
- {{{
- public interface ClientToNameNodeProtocol extends VersionedProtocol {
-   IndexLocation[] getSearchableIndexes();
-   IndexLocation getUpdateableIndex(String id);
-   public String getDataNode() ; // get a random data node
- }
- }}}
- 
- === Data node to Data node ===
- 
- {{{
- public interface DataNodeToDataNodeProtocol extends VersionedProtocol {
-   String[] getFileSet(IndexVersion indexVersion);
-   byte[] getFileContent(IndexVersion indexVersion, String file);
- }
- }}}
- 
- === Data node to Name node ===
- 
- {{{
- public interface DataNodeToNameNodeProtocol extends VersionedProtocol {
-   public IndexLocation[] heartbeat(DataNodeStatusInformation datanode,
-       IndexLocation[] searchableIndexes);
- }
- }}}
- 
- === Client API ===
- 
- {{{
- public interface ClientAPI {
-   void createIndex(String index, boolean sharded);
-   String[] getIndexes();
-   void addDocument(String index, Document doc);
-   int removeDocuments(String index, Term term);
-   void commit(String index);
-   SearchResults search(String index, Query query, Sort sort, int n);
- }
- }}}
- 
- === Implementation Notes ===
  
  === Current Status ===
  
- Currently there is an alpha implementation of the design outlined above specifically the
master, worker, client library and unit tests. This is awaiting review by HP for contribution
to the Apache Foundation.
+ Currently there is an alpha implementation of the design outlined above specifically the
master, worker, client library and unit tests. 
  
  Rather than using HDFS, the implementation (DLucene) is heavily inspired by HDFS. This is
because the files uses in Lucene indexes are quite different from the files that HDFS was
designed for. It uses a similar replication algorithm to HDFS, and where possible HDFS uses
code although it was necessary to make some local changes to the visibility of some classes
and methods. 
  

Mime
View raw message