lucene-lucene-net-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Digy" <digyd...@gmail.com>
Subject RE: Document class improvements
Date Tue, 04 May 2010 17:43:30 GMT
Hi Leo,

 

Your code seems to be fine but there are two problems with it:

* Some of the test cases fail(for ex, "TestRemoveForNewDocument"). I don't
think it is a big problem and it can be fixed somehow 

but most importantly,

* It includes a lot of divergence from Lucene.Java which would make live
very hard while trying to port new versions (or backports) of Lucene.Java.

 

DIGY

 

 

From: Leonardo Azize Martins [mailto:lazize@gmail.com] 
Sent: Monday, May 03, 2010 8:56 PM
To: lucene-net-dev@lucene.apache.org
Subject: Document class improvements

 

Hi,

 

I would like to share with you some improvements in Document class.

 

I changed from ArrayList to Dictionary<string, Fieldable>.

It avoid box/unbox, and it is better to iterate by field name.

 

I tested and the functionality is the same as before, but with more
performance.

I appreciate some feedback.

 

I do not know if this list accept attached file, so I send attached and in
mail body (below).

 

Regards,

Leo

 

######### Document.cs ###################

/* 
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

using System;

// for javadoc
using IndexReader = Lucene.Net.Index.IndexReader;
using ScoreDoc = Lucene.Net.Search.ScoreDoc;
using Searcher = Lucene.Net.Search.Searcher;

namespace Lucene.Net.Documents
{
 
 /// <summary>Documents are the unit of indexing and search.
 /// 
 /// A Document is a set of fields.  Each field has a name and a textual
value.
 /// A field may be {@link <mailto:%7B@link>  Fieldable#IsStored() stored}
with the document, in which
 /// case it is returned with search hits on the document.  Thus each
document
 /// should typically contain one or more stored fields which uniquely
identify
 /// it.
 /// 
 /// <p/>Note that fields which are <i>not</i> {@link <mailto:%7B@link>
Fieldable#IsStored() stored} are
 /// <i>not</i> available in documents retrieved from the index, e.g. with
{@link <mailto:%7B@link> 
 /// ScoreDoc#doc}, {@link <mailto:%7B@link>  Searcher#Doc(int)} or {@link
<mailto:%7B@link> 
 /// IndexReader#Document(int)}.
 /// </summary>
 
 [Serializable]
 public sealed class Document
 {
  private class AnonymousClassEnumeration : System.Collections.IEnumerator
  {
   public AnonymousClassEnumeration(Document enclosingInstance)
   {
    InitBlock(enclosingInstance);
   }
   private void  InitBlock(Document enclosingInstance)
   {
    this.enclosingInstance = enclosingInstance;
    iter = Enclosing_Instance.fields.GetEnumerator();
   }
   private System.Object tempAuxObj;
   public bool MoveNext()
   {
    bool result = HasMoreElements();
    if (result)
    {
     tempAuxObj = NextElement();
    }
    return result;
   }
   public void  Reset()
   {
    tempAuxObj = null;
   }
   public System.Object Current
   {
    get
    {
     return tempAuxObj;
    }
    
   }
   private Document enclosingInstance;
   public Document Enclosing_Instance
   {
    get
    {
     return enclosingInstance;
    }
    
   }
   internal System.Collections.IEnumerator iter;
   public bool HasMoreElements()
   {
    return iter.MoveNext();
   }
   public System.Object NextElement()
   {
    return iter.Current;
   }
  }

        internal class FieldControl :
System.Collections.Generic.Dictionary<string,
System.Collections.Generic.List<Fieldable>>
        {
            public void Add(Fieldable item)
            {
                string key = item.Name();
                if (!base.ContainsKey(key))
                {
                    base.Add(key, new
System.Collections.Generic.List<Fieldable>());
                }
                base[key].Add(item);
            }

            public void RemoveOne(string name)
            {
                string key = name;
                if (base.ContainsKey(key))
                {
                    base[key].RemoveAt(0);
                }
            }

            public void RemoveAll(string name)
            {
                string key = name;
                if (base.ContainsKey(key))
                {
                    base.Remove(key);
                }
            }

            public System.Collections.Generic.List<Fieldable> GetList(string
name)
            {
                string key = name;
                if (!base.ContainsKey(key))
                {
                    return new System.Collections.Generic.List<Fieldable>();
                }

                return base[key];
            }

            public Fieldable GetFirst(string name)
            {
                string key = name;
                if (!base.ContainsKey(key))
                {
                    return null;
                }

                return base[key][0];
            }

            public System.Collections.Generic.List<Fieldable> GetAllLists()
            {
                System.Collections.Generic.List<Fieldable> fieldables = new
System.Collections.Generic.List<Fieldable>();
                foreach (string key in base.Keys)
                {
                    fieldables.AddRange(base[key]);
                }
                return fieldables;
            }

            public System.Collections.Generic.List<T>
GetListWithConstraint<T>(string name, Func<T> constraint) where T : class
            {
                System.Collections.Generic.List<Fieldable> fieldables =
this.GetList(name);
                System.Collections.Generic.List<T> internalList = new
System.Collections.Generic.List<T>(fieldables.Count);
                foreach (Fieldable item in fieldables)
                {
                    T instance = constraint.Invoke(item);
                    if (instance != null)
                    {
                        internalList.Add(instance);
                    }
                }
                return internalList;
            }

            public T GetFirstWithConstraint<T>(string name, Func<T>
constraint) where T : class
            {
                System.Collections.Generic.List<Fieldable> fieldables =
this.GetList(name);
                System.Collections.Generic.List<T> internalList = new
System.Collections.Generic.List<T>(fieldables.Count);
                foreach (Fieldable item in fieldables)
                {
                    T instance = constraint.Invoke(item);
                    if (instance != null)
                    {
                        return instance;
                    }
                }
                return default(T);
            }

            public delegate T Func<T>(Fieldable item) where T : class;
        }

        internal FieldControl fields = new FieldControl();

  private float boost = 1.0f;
  
  /// <summary>Constructs a new document with no fields. </summary>
  public Document()
  {
  }
  
  
  /// <summary>Sets a boost factor for hits on any field of this document.
This value
  /// will be multiplied into the score of all hits on this document.
  /// 
  /// <p/>The default value is 1.0.
  /// 
  /// <p/>Values are multiplied into the value of {@link <mailto:%7B@link>
Fieldable#GetBoost()} of
  /// each field in this document.  Thus, this method in effect sets a
default
  /// boost for the fields of this document.
  /// 
  /// </summary>
  /// <seealso cref="Fieldable.SetBoost(float)">
  /// </seealso>
  public void  SetBoost(float boost)
  {
   this.boost = boost;
  }
  
  /// <summary>Returns, at indexing time, the boost factor as set by {@link
<mailto:%7B@link>  #SetBoost(float)}. 
  /// 
  /// <p/>Note that once a document is indexed this value is no longer
available
  /// from the index.  At search time, for retrieved documents, this method
always 
  /// returns 1. This however does not mean that the boost value set at
indexing 
  /// time was ignored - it was just combined with other indexing time
factors and 
  /// stored elsewhere, for better indexing and search performance. (For
more 
  /// information see the "norm(t,d)" part of the scoring formula in 
  /// {@link <mailto:%7B@link>  Lucene.Net.Search.Similarity Similarity}.)
  /// 
  /// </summary>
  /// <seealso cref="SetBoost(float)">
  /// </seealso>
  public float GetBoost()
  {
   return boost;
  }
  
  /// <summary> <p/>Adds a field to a document.  Several fields may be added
with
  /// the same name.  In this case, if the fields are indexed, their text is
  /// treated as though appended for the purposes of search.<p/>
  /// <p/> Note that add like the removeField(s) methods only makes sense 
  /// prior to adding a document to an index. These methods cannot
  /// be used to change the content of an existing index! In order to
achieve this,
  /// a document has to be deleted from an index and a new changed version
of that
  /// document has to be added.<p/>
  /// </summary>
  public void  Add(Fieldable field)
  {
   fields.Add(field);
  }
  
  /// <summary> <p/>Removes field with the specified name from the document.
  /// If multiple fields exist with this name, this method removes the first
field that has been added.
  /// If there is no field with the specified name, the document remains
unchanged.<p/>
  /// <p/> Note that the removeField(s) methods like the add method only
make sense 
  /// prior to adding a document to an index. These methods cannot
  /// be used to change the content of an existing index! In order to
achieve this,
  /// a document has to be deleted from an index and a new changed version
of that
  /// document has to be added.<p/>
  /// </summary>
  public void  RemoveField(System.String name)
  {
            fields.RemoveOne(name);
  }
  
  /// <summary> <p/>Removes all fields with the given name from the
document.
  /// If there is no field with the specified name, the document remains
unchanged.<p/>
  /// <p/> Note that the removeField(s) methods like the add method only
make sense 
  /// prior to adding a document to an index. These methods cannot
  /// be used to change the content of an existing index! In order to
achieve this,
  /// a document has to be deleted from an index and a new changed version
of that
  /// document has to be added.<p/>
  /// </summary>
  public void  RemoveFields(System.String name)
  {
            fields.RemoveAll(name);
  }
  
  /// <summary>Returns a field with the given name if any exist in this
document, or
  /// null.  If multiple fields exists with this name, this method returns
the
  /// first value added.
  /// Do not use this method with lazy loaded fields.
  /// </summary>
  public Field GetField(System.String name)
  {
            return fields.GetFirst(name) as Field;
  }
  
  
  /// <summary>Returns a field with the given name if any exist in this
document, or
  /// null.  If multiple fields exists with this name, this method returns
the
  /// first value added.
  /// </summary>
  public Fieldable GetFieldable(System.String name)
  {
            return fields.GetFirst(name);
  }
  
  /// <summary>Returns the string value of the field with the given name if
any exist in
  /// this document, or null.  If multiple fields exist with this name, this
  /// method returns the first value added. If only binary fields with this
name
  /// exist, returns null.
  /// </summary>
  public System.String Get(System.String name)
  {
            return fields.GetFirstWithConstraint<string>(name, 
                delegate(Fieldable fieldable)
                {
                    if (!fieldable.IsBinary())
                        return fieldable.StringValue();
                    return null;
                });
  }
  
  /// <summary>Returns an Enumeration of all the fields in a
document.</summary>
  /// <deprecated> use {@link <mailto:%7B@link>  #GetFields()} instead
  /// </deprecated>
        [Obsolete("Use GetFields() instead")]
  public System.Collections.IEnumerator Fields()
  {
   return new AnonymousClassEnumeration(this);
  }
  
  /// <summary>Returns a List of all the fields in a document.
  /// <p/>Note that fields which are <i>not</i> {@link <mailto:%7B@link>
Fieldable#IsStored() stored} are
  /// <i>not</i> available in documents retrieved from the
  /// index, e.g. {@link <mailto:%7B@link>  Searcher#Doc(int)} or {@link
<mailto:%7B@link> 
  /// IndexReader#Document(int)}.
  /// </summary>
  public System.Collections.IList GetFields()
  {
            return fields.GetAllLists();
  }
  
  /// <summary> Returns an array of {@link <mailto:%7B@link>  Field}s with
the given name.
  /// Do not use with lazy loaded fields.
  /// This method returns an empty array when there are no
  /// matching fields.  It never returns null.
  /// 
  /// </summary>
  /// <param name="name">the name of the field
  /// </param>
  /// <returns> a <code>Field[]</code> array
  /// </returns>
  public Field[] GetFields(System.String name)
  {
            System.Collections.Generic.List<Fieldable> fieldables =
fields.GetList(name);
            System.Collections.Generic.List<Field> localFields = new
System.Collections.Generic.List<Field>(fieldables.Count);
            foreach (Fieldable item in fieldables)
            {
                localFields.Add(item as Field);
            }
            return localFields.ToArray();
  }


  /// <summary> Returns an array of {@link <mailto:%7B@link>  Fieldable}s
with the given name.
  /// This method returns an empty array when there are no
  /// matching fields.  It never returns null.
  /// 
  /// </summary>
  /// <param name="name">the name of the field
  /// </param>
  /// <returns> a <code>Fieldable[]</code> array
  /// </returns>
  public Fieldable[] GetFieldables(System.String name)
  {
            return fields.GetList(name).ToArray();
  }
  
  
  /// <summary> Returns an array of values of the field specified as the
method parameter.
  /// This method returns an empty array when there are no
  /// matching fields.  It never returns null.
  /// </summary>
  /// <param name="name">the name of the field
  /// </param>
  /// <returns> a <code>String[]</code> of field values
  /// </returns>
  public System.String[] GetValues(System.String name)
  {
            return fields.GetListWithConstraint<string>(name,
                                delegate(Fieldable fieldable)
                                {
                                    if (!fieldable.IsBinary())
                                        return fieldable.StringValue();
                                    return null;
                                }).ToArray();
  }
  
  private static readonly byte[][] NO_BYTES = new byte[0][];
  
  /// <summary> Returns an array of byte arrays for of the fields that have
the name specified
  /// as the method parameter.  This method returns an empty
  /// array when there are no matching fields.  It never
  /// returns null.
  /// 
  /// </summary>
  /// <param name="name">the name of the field
  /// </param>
  /// <returns> a <code>byte[][]</code> of binary field values
  /// </returns>
  public byte[][] GetBinaryValues(System.String name)
  {
            System.Collections.Generic.List<Fieldable> fieldables =
fields.GetListWithConstraint<Fieldable>(name,
 
delegate(Fieldable fieldable)
                                                                        {
 
if (fieldable.IsBinary())
 
return fieldable;
 
return null;
                                                                        });

   System.Collections.IList result = new System.Collections.ArrayList();
            for (int i = 0; i < fieldables.Count; i++)
   {
                Fieldable field = fieldables[i];
       result.Add(field.BinaryValue());
   }
   
   if (result.Count == 0)
    return NO_BYTES;
   
            System.Collections.ICollection c = result;
            object[] objects = new byte[result.Count][];

            System.Type type = objects.GetType().GetElementType();
            object[] objs = (object[])Array.CreateInstance(type, c.Count);

            System.Collections.IEnumerator e = c.GetEnumerator();
            int ii = 0;

            while (e.MoveNext())
                objs[ii++] = e.Current;

            // If objects is smaller than c then do not return the new array
in the parameter
            if (objects.Length >= c.Count)
                objs.CopyTo(objects, 0);

            return (byte[][])objs;
        }
  
  /// <summary> Returns an array of bytes for the first (or only) field that
has the name
  /// specified as the method parameter. This method will return
<code>null</code>
  /// if no binary fields with the specified name are available.
  /// There may be non-binary fields with the same name.
  /// 
  /// </summary>
  /// <param name="name">the name of the field.
  /// </param>
  /// <returns> a <code>byte[]</code> containing the binary field value
or
<code>null</code>
  /// </returns>
  public byte[] GetBinaryValue(System.String name)
  {
            return fields.GetFirstWithConstraint<byte[]>(name, 
                                delegate(Fieldable fieldable)
                {
                    if (fieldable.IsBinary())
                        return fieldable.BinaryValue();
                    return null;
                });
  }
  
  /// <summary>Prints the fields of a document for human consumption.
</summary>
  public override System.String ToString()
  {
            System.Collections.Generic.List<Fieldable> fieldables =
fields.GetAllLists();

   System.Text.StringBuilder buffer = new System.Text.StringBuilder();
   buffer.Append("Document<");
   for (int i = 0; i < fields.Count; i++)
   {
                Fieldable field = fieldables[i];
    buffer.Append(field.ToString());
                if (i != fieldables.Count - 1)
     buffer.Append(" ");
   }
   buffer.Append(">");
   return buffer.ToString();
  }

        public System.Collections.IList fields_ForNUnit
        {
            get { return fields.GetAllLists(); }
        }
 }
}


Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message