lucene-lucene-net-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Leonardo Azize Martins <laz...@gmail.com>
Subject Document class improvements
Date Mon, 03 May 2010 17:55:49 GMT
Hi,

I would like to share with you some improvements in Document class.

I changed from ArrayList to Dictionary<string, Fieldable>.
It avoid box/unbox, and it is better to iterate by field name.

I tested and the functionality is the same as before, but with more
performance.
I appreciate some feedback.

I do not know if this list accept attached file, so I send attached and in
mail body (below).

Regards,
Leo

######### Document.cs ###################
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
using System;
// for javadoc
using IndexReader = Lucene.Net.Index.IndexReader;
using ScoreDoc = Lucene.Net.Search.ScoreDoc;
using Searcher = Lucene.Net.Search.Searcher;
namespace Lucene.Net.Documents
{

 /// <summary>Documents are the unit of indexing and search.
 ///
 /// A Document is a set of fields.  Each field has a name and a textual
value.
 /// A field may be {@link <%7B@link> Fieldable#IsStored() stored} with the
document, in which
 /// case it is returned with search hits on the document.  Thus each
document
 /// should typically contain one or more stored fields which uniquely
identify
 /// it.
 ///
 /// <p/>Note that fields which are <i>not</i> {@link
<%7B@link>Fieldable#IsStored() stored} are
 /// <i>not</i> available in documents retrieved from the index, e.g. with
{@link <%7B@link>
 /// ScoreDoc#doc}, {@link <%7B@link> Searcher#Doc(int)} or {@link<%7B@link>
 /// IndexReader#Document(int)}.
 /// </summary>

 [Serializable]
 public sealed class Document
 {
  private class AnonymousClassEnumeration : System.Collections.IEnumerator
  {
   public AnonymousClassEnumeration(Document enclosingInstance)
   {
    InitBlock(enclosingInstance);
   }
   private void  InitBlock(Document enclosingInstance)
   {
    this.enclosingInstance = enclosingInstance;
    iter = Enclosing_Instance.fields.GetEnumerator();
   }
   private System.Object tempAuxObj;
   public bool MoveNext()
   {
    bool result = HasMoreElements();
    if (result)
    {
     tempAuxObj = NextElement();
    }
    return result;
   }
   public void  Reset()
   {
    tempAuxObj = null;
   }
   public System.Object Current
   {
    get
    {
     return tempAuxObj;
    }

   }
   private Document enclosingInstance;
   public Document Enclosing_Instance
   {
    get
    {
     return enclosingInstance;
    }

   }
   internal System.Collections.IEnumerator iter;
   public bool HasMoreElements()
   {
    return iter.MoveNext();
   }
   public System.Object NextElement()
   {
    return iter.Current;
   }
  }
        internal class FieldControl :
System.Collections.Generic.Dictionary<string,
System.Collections.Generic.List<Fieldable>>
        {
            public void Add(Fieldable item)
            {
                string key = item.Name();
                if (!base.ContainsKey(key))
                {
                    base.Add(key, new
System.Collections.Generic.List<Fieldable>());
                }
                base[key].Add(item);
            }
            public void RemoveOne(string name)
            {
                string key = name;
                if (base.ContainsKey(key))
                {
                    base[key].RemoveAt(0);
                }
            }
            public void RemoveAll(string name)
            {
                string key = name;
                if (base.ContainsKey(key))
                {
                    base.Remove(key);
                }
            }
            public System.Collections.Generic.List<Fieldable> GetList(string
name)
            {
                string key = name;
                if (!base.ContainsKey(key))
                {
                    return new System.Collections.Generic.List<Fieldable>();
                }
                return base[key];
            }
            public Fieldable GetFirst(string name)
            {
                string key = name;
                if (!base.ContainsKey(key))
                {
                    return null;
                }
                return base[key][0];
            }
            public System.Collections.Generic.List<Fieldable> GetAllLists()
            {
                System.Collections.Generic.List<Fieldable> fieldables = new
System.Collections.Generic.List<Fieldable>();
                foreach (string key in base.Keys)
                {
                    fieldables.AddRange(base[key]);
                }
                return fieldables;
            }
            public System.Collections.Generic.List<T>
GetListWithConstraint<T>(string name, Func<T> constraint) where T : class
            {
                System.Collections.Generic.List<Fieldable> fieldables =
this.GetList(name);
                System.Collections.Generic.List<T> internalList = new
System.Collections.Generic.List<T>(fieldables.Count);
                foreach (Fieldable item in fieldables)
                {
                    T instance = constraint.Invoke(item);
                    if (instance != null)
                    {
                        internalList.Add(instance);
                    }
                }
                return internalList;
            }
            public T GetFirstWithConstraint<T>(string name, Func<T>
constraint) where T : class
            {
                System.Collections.Generic.List<Fieldable> fieldables =
this.GetList(name);
                System.Collections.Generic.List<T> internalList = new
System.Collections.Generic.List<T>(fieldables.Count);
                foreach (Fieldable item in fieldables)
                {
                    T instance = constraint.Invoke(item);
                    if (instance != null)
                    {
                        return instance;
                    }
                }
                return default(T);
            }
            public delegate T Func<T>(Fieldable item) where T : class;
        }
        internal FieldControl fields = new FieldControl();
  private float boost = 1.0f;

  /// <summary>Constructs a new document with no fields. </summary>
  public Document()
  {
  }


  /// <summary>Sets a boost factor for hits on any field of this document.
This value
  /// will be multiplied into the score of all hits on this document.
  ///
  /// <p/>The default value is 1.0.
  ///
  /// <p/>Values are multiplied into the value of {@link
<%7B@link>Fieldable#GetBoost()} of
  /// each field in this document.  Thus, this method in effect sets a
default
  /// boost for the fields of this document.
  ///
  /// </summary>
  /// <seealso cref="Fieldable.SetBoost(float)">
  /// </seealso>
  public void  SetBoost(float boost)
  {
   this.boost = boost;
  }

  /// <summary>Returns, at indexing time, the boost factor as set by
{@link<%7B@link>#SetBoost(float)}.
  ///
  /// <p/>Note that once a document is indexed this value is no longer
available
  /// from the index.  At search time, for retrieved documents, this method
always
  /// returns 1. This however does not mean that the boost value set at
indexing
  /// time was ignored - it was just combined with other indexing time
factors and
  /// stored elsewhere, for better indexing and search performance. (For
more
  /// information see the "norm(t,d)" part of the scoring formula in
  /// {@link <%7B@link> Lucene.Net.Search.Similarity Similarity}.)
  ///
  /// </summary>
  /// <seealso cref="SetBoost(float)">
  /// </seealso>
  public float GetBoost()
  {
   return boost;
  }

  /// <summary> <p/>Adds a field to a document.  Several fields may be added
with
  /// the same name.  In this case, if the fields are indexed, their text is
  /// treated as though appended for the purposes of search.<p/>
  /// <p/> Note that add like the removeField(s) methods only makes sense
  /// prior to adding a document to an index. These methods cannot
  /// be used to change the content of an existing index! In order to
achieve this,
  /// a document has to be deleted from an index and a new changed version
of that
  /// document has to be added.<p/>
  /// </summary>
  public void  Add(Fieldable field)
  {
   fields.Add(field);
  }

  /// <summary> <p/>Removes field with the specified name from the document.
  /// If multiple fields exist with this name, this method removes the first
field that has been added.
  /// If there is no field with the specified name, the document remains
unchanged.<p/>
  /// <p/> Note that the removeField(s) methods like the add method only
make sense
  /// prior to adding a document to an index. These methods cannot
  /// be used to change the content of an existing index! In order to
achieve this,
  /// a document has to be deleted from an index and a new changed version
of that
  /// document has to be added.<p/>
  /// </summary>
  public void  RemoveField(System.String name)
  {
            fields.RemoveOne(name);
  }

  /// <summary> <p/>Removes all fields with the given name from the
document.
  /// If there is no field with the specified name, the document remains
unchanged.<p/>
  /// <p/> Note that the removeField(s) methods like the add method only
make sense
  /// prior to adding a document to an index. These methods cannot
  /// be used to change the content of an existing index! In order to
achieve this,
  /// a document has to be deleted from an index and a new changed version
of that
  /// document has to be added.<p/>
  /// </summary>
  public void  RemoveFields(System.String name)
  {
            fields.RemoveAll(name);
  }

  /// <summary>Returns a field with the given name if any exist in this
document, or
  /// null.  If multiple fields exists with this name, this method returns
the
  /// first value added.
  /// Do not use this method with lazy loaded fields.
  /// </summary>
  public Field GetField(System.String name)
  {
            return fields.GetFirst(name) as Field;
  }


  /// <summary>Returns a field with the given name if any exist in this
document, or
  /// null.  If multiple fields exists with this name, this method returns
the
  /// first value added.
  /// </summary>
  public Fieldable GetFieldable(System.String name)
  {
            return fields.GetFirst(name);
  }

  /// <summary>Returns the string value of the field with the given name if
any exist in
  /// this document, or null.  If multiple fields exist with this name, this
  /// method returns the first value added. If only binary fields with this
name
  /// exist, returns null.
  /// </summary>
  public System.String Get(System.String name)
  {
            return fields.GetFirstWithConstraint<string>(name,
                delegate(Fieldable fieldable)
                {
                    if (!fieldable.IsBinary())
                        return fieldable.StringValue();
                    return null;
                });
  }

  /// <summary>Returns an Enumeration of all the fields in a
document.</summary>
  /// <deprecated> use {@link <%7B@link> #GetFields()} instead
  /// </deprecated>
        [Obsolete("Use GetFields() instead")]
  public System.Collections.IEnumerator Fields()
  {
   return new AnonymousClassEnumeration(this);
  }

  /// <summary>Returns a List of all the fields in a document.
  /// <p/>Note that fields which are <i>not</i> {@link
<%7B@link>Fieldable#IsStored() stored} are
  /// <i>not</i> available in documents retrieved from the
  /// index, e.g. {@link <%7B@link> Searcher#Doc(int)} or {@link <%7B@link>
  /// IndexReader#Document(int)}.
  /// </summary>
  public System.Collections.IList GetFields()
  {
            return fields.GetAllLists();
  }

  /// <summary> Returns an array of {@link <%7B@link> Field}s with the given
name.
  /// Do not use with lazy loaded fields.
  /// This method returns an empty array when there are no
  /// matching fields.  It never returns null.
  ///
  /// </summary>
  /// <param name="name">the name of the field
  /// </param>
  /// <returns> a <code>Field[]</code> array
  /// </returns>
  public Field[] GetFields(System.String name)
  {
            System.Collections.Generic.List<Fieldable> fieldables =
fields.GetList(name);
            System.Collections.Generic.List<Field> localFields = new
System.Collections.Generic.List<Field>(fieldables.Count);
            foreach (Fieldable item in fieldables)
            {
                localFields.Add(item as Field);
            }
            return localFields.ToArray();
  }

  /// <summary> Returns an array of {@link <%7B@link> Fieldable}s with the
given name.
  /// This method returns an empty array when there are no
  /// matching fields.  It never returns null.
  ///
  /// </summary>
  /// <param name="name">the name of the field
  /// </param>
  /// <returns> a <code>Fieldable[]</code> array
  /// </returns>
  public Fieldable[] GetFieldables(System.String name)
  {
            return fields.GetList(name).ToArray();
  }


  /// <summary> Returns an array of values of the field specified as the
method parameter.
  /// This method returns an empty array when there are no
  /// matching fields.  It never returns null.
  /// </summary>
  /// <param name="name">the name of the field
  /// </param>
  /// <returns> a <code>String[]</code> of field values
  /// </returns>
  public System.String[] GetValues(System.String name)
  {
            return fields.GetListWithConstraint<string>(name,
                                delegate(Fieldable fieldable)
                                {
                                    if (!fieldable.IsBinary())
                                        return fieldable.StringValue();
                                    return null;
                                }).ToArray();
  }

  private static readonly byte[][] NO_BYTES = new byte[0][];

  /// <summary> Returns an array of byte arrays for of the fields that have
the name specified
  /// as the method parameter.  This method returns an empty
  /// array when there are no matching fields.  It never
  /// returns null.
  ///
  /// </summary>
  /// <param name="name">the name of the field
  /// </param>
  /// <returns> a <code>byte[][]</code> of binary field values
  /// </returns>
  public byte[][] GetBinaryValues(System.String name)
  {
            System.Collections.Generic.List<Fieldable> fieldables =
fields.GetListWithConstraint<Fieldable>(name,

delegate(Fieldable fieldable)
                                                                        {

if (fieldable.IsBinary())

return fieldable;

return null;
                                                                        });
   System.Collections.IList result = new System.Collections.ArrayList();
            for (int i = 0; i < fieldables.Count; i++)
   {
                Fieldable field = fieldables[i];
       result.Add(field.BinaryValue());
   }

   if (result.Count == 0)
    return NO_BYTES;

            System.Collections.ICollection c = result;
            object[] objects = new byte[result.Count][];
            System.Type type = objects.GetType().GetElementType();
            object[] objs = (object[])Array.CreateInstance(type, c.Count);
            System.Collections.IEnumerator e = c.GetEnumerator();
            int ii = 0;
            while (e.MoveNext())
                objs[ii++] = e.Current;
            // If objects is smaller than c then do not return the new array
in the parameter
            if (objects.Length >= c.Count)
                objs.CopyTo(objects, 0);
            return (byte[][])objs;
        }

  /// <summary> Returns an array of bytes for the first (or only) field that
has the name
  /// specified as the method parameter. This method will return
<code>null</code>
  /// if no binary fields with the specified name are available.
  /// There may be non-binary fields with the same name.
  ///
  /// </summary>
  /// <param name="name">the name of the field.
  /// </param>
  /// <returns> a <code>byte[]</code> containing the binary field value
or
<code>null</code>
  /// </returns>
  public byte[] GetBinaryValue(System.String name)
  {
            return fields.GetFirstWithConstraint<byte[]>(name,
                                delegate(Fieldable fieldable)
                {
                    if (fieldable.IsBinary())
                        return fieldable.BinaryValue();
                    return null;
                });
  }

  /// <summary>Prints the fields of a document for human consumption.
</summary>
  public override System.String ToString()
  {
            System.Collections.Generic.List<Fieldable> fieldables =
fields.GetAllLists();
   System.Text.StringBuilder buffer = new System.Text.StringBuilder();
   buffer.Append("Document<");
   for (int i = 0; i < fields.Count; i++)
   {
                Fieldable field = fieldables[i];
    buffer.Append(field.ToString());
                if (i != fieldables.Count - 1)
     buffer.Append(" ");
   }
   buffer.Append(">");
   return buffer.ToString();
  }
        public System.Collections.IList fields_ForNUnit
        {
            get { return fields.GetAllLists(); }
        }
 }
}

Mime
  • Unnamed multipart/mixed (inline, None, 0 bytes)
View raw message