Hi,
I would like to share with you some improvements in Document class.
I changed from ArrayList to Dictionary<string, Fieldable>.
It avoid box/unbox, and it is better to iterate by field name.
I tested and the functionality is the same as before, but with more
performance.
I appreciate some feedback.
I do not know if this list accept attached file, so I send attached and in
mail body (below).
Regards,
Leo
######### Document.cs ###################
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
// for javadoc
using IndexReader = Lucene.Net.Index.IndexReader;
using ScoreDoc = Lucene.Net.Search.ScoreDoc;
using Searcher = Lucene.Net.Search.Searcher;
namespace Lucene.Net.Documents
{
/// <summary>Documents are the unit of indexing and search.
///
/// A Document is a set of fields. Each field has a name and a textual
value.
/// A field may be {@link <%7B@link> Fieldable#IsStored() stored} with the
document, in which
/// case it is returned with search hits on the document. Thus each
document
/// should typically contain one or more stored fields which uniquely
identify
/// it.
///
/// <p/>Note that fields which are <i>not</i> {@link
<%7B@link>Fieldable#IsStored() stored} are
/// <i>not</i> available in documents retrieved from the index, e.g. with
{@link <%7B@link>
/// ScoreDoc#doc}, {@link <%7B@link> Searcher#Doc(int)} or {@link<%7B@link>
/// IndexReader#Document(int)}.
/// </summary>
[Serializable]
public sealed class Document
{
private class AnonymousClassEnumeration : System.Collections.IEnumerator
{
public AnonymousClassEnumeration(Document enclosingInstance)
{
InitBlock(enclosingInstance);
}
private void InitBlock(Document enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
iter = Enclosing_Instance.fields.GetEnumerator();
}
private System.Object tempAuxObj;
public bool MoveNext()
{
bool result = HasMoreElements();
if (result)
{
tempAuxObj = NextElement();
}
return result;
}
public void Reset()
{
tempAuxObj = null;
}
public System.Object Current
{
get
{
return tempAuxObj;
}
}
private Document enclosingInstance;
public Document Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
internal System.Collections.IEnumerator iter;
public bool HasMoreElements()
{
return iter.MoveNext();
}
public System.Object NextElement()
{
return iter.Current;
}
}
internal class FieldControl :
System.Collections.Generic.Dictionary<string,
System.Collections.Generic.List<Fieldable>>
{
public void Add(Fieldable item)
{
string key = item.Name();
if (!base.ContainsKey(key))
{
base.Add(key, new
System.Collections.Generic.List<Fieldable>());
}
base[key].Add(item);
}
public void RemoveOne(string name)
{
string key = name;
if (base.ContainsKey(key))
{
base[key].RemoveAt(0);
}
}
public void RemoveAll(string name)
{
string key = name;
if (base.ContainsKey(key))
{
base.Remove(key);
}
}
public System.Collections.Generic.List<Fieldable> GetList(string
name)
{
string key = name;
if (!base.ContainsKey(key))
{
return new System.Collections.Generic.List<Fieldable>();
}
return base[key];
}
public Fieldable GetFirst(string name)
{
string key = name;
if (!base.ContainsKey(key))
{
return null;
}
return base[key][0];
}
public System.Collections.Generic.List<Fieldable> GetAllLists()
{
System.Collections.Generic.List<Fieldable> fieldables = new
System.Collections.Generic.List<Fieldable>();
foreach (string key in base.Keys)
{
fieldables.AddRange(base[key]);
}
return fieldables;
}
public System.Collections.Generic.List<T>
GetListWithConstraint<T>(string name, Func<T> constraint) where T : class
{
System.Collections.Generic.List<Fieldable> fieldables =
this.GetList(name);
System.Collections.Generic.List<T> internalList = new
System.Collections.Generic.List<T>(fieldables.Count);
foreach (Fieldable item in fieldables)
{
T instance = constraint.Invoke(item);
if (instance != null)
{
internalList.Add(instance);
}
}
return internalList;
}
public T GetFirstWithConstraint<T>(string name, Func<T>
constraint) where T : class
{
System.Collections.Generic.List<Fieldable> fieldables =
this.GetList(name);
System.Collections.Generic.List<T> internalList = new
System.Collections.Generic.List<T>(fieldables.Count);
foreach (Fieldable item in fieldables)
{
T instance = constraint.Invoke(item);
if (instance != null)
{
return instance;
}
}
return default(T);
}
public delegate T Func<T>(Fieldable item) where T : class;
}
internal FieldControl fields = new FieldControl();
private float boost = 1.0f;
/// <summary>Constructs a new document with no fields. </summary>
public Document()
{
}
/// <summary>Sets a boost factor for hits on any field of this document.
This value
/// will be multiplied into the score of all hits on this document.
///
/// <p/>The default value is 1.0.
///
/// <p/>Values are multiplied into the value of {@link
<%7B@link>Fieldable#GetBoost()} of
/// each field in this document. Thus, this method in effect sets a
default
/// boost for the fields of this document.
///
/// </summary>
/// <seealso cref="Fieldable.SetBoost(float)">
/// </seealso>
public void SetBoost(float boost)
{
this.boost = boost;
}
/// <summary>Returns, at indexing time, the boost factor as set by
{@link<%7B@link>#SetBoost(float)}.
///
/// <p/>Note that once a document is indexed this value is no longer
available
/// from the index. At search time, for retrieved documents, this method
always
/// returns 1. This however does not mean that the boost value set at
indexing
/// time was ignored - it was just combined with other indexing time
factors and
/// stored elsewhere, for better indexing and search performance. (For
more
/// information see the "norm(t,d)" part of the scoring formula in
/// {@link <%7B@link> Lucene.Net.Search.Similarity Similarity}.)
///
/// </summary>
/// <seealso cref="SetBoost(float)">
/// </seealso>
public float GetBoost()
{
return boost;
}
/// <summary> <p/>Adds a field to a document. Several fields may be added
with
/// the same name. In this case, if the fields are indexed, their text is
/// treated as though appended for the purposes of search.<p/>
/// <p/> Note that add like the removeField(s) methods only makes sense
/// prior to adding a document to an index. These methods cannot
/// be used to change the content of an existing index! In order to
achieve this,
/// a document has to be deleted from an index and a new changed version
of that
/// document has to be added.<p/>
/// </summary>
public void Add(Fieldable field)
{
fields.Add(field);
}
/// <summary> <p/>Removes field with the specified name from the document.
/// If multiple fields exist with this name, this method removes the first
field that has been added.
/// If there is no field with the specified name, the document remains
unchanged.<p/>
/// <p/> Note that the removeField(s) methods like the add method only
make sense
/// prior to adding a document to an index. These methods cannot
/// be used to change the content of an existing index! In order to
achieve this,
/// a document has to be deleted from an index and a new changed version
of that
/// document has to be added.<p/>
/// </summary>
public void RemoveField(System.String name)
{
fields.RemoveOne(name);
}
/// <summary> <p/>Removes all fields with the given name from the
document.
/// If there is no field with the specified name, the document remains
unchanged.<p/>
/// <p/> Note that the removeField(s) methods like the add method only
make sense
/// prior to adding a document to an index. These methods cannot
/// be used to change the content of an existing index! In order to
achieve this,
/// a document has to be deleted from an index and a new changed version
of that
/// document has to be added.<p/>
/// </summary>
public void RemoveFields(System.String name)
{
fields.RemoveAll(name);
}
/// <summary>Returns a field with the given name if any exist in this
document, or
/// null. If multiple fields exists with this name, this method returns
the
/// first value added.
/// Do not use this method with lazy loaded fields.
/// </summary>
public Field GetField(System.String name)
{
return fields.GetFirst(name) as Field;
}
/// <summary>Returns a field with the given name if any exist in this
document, or
/// null. If multiple fields exists with this name, this method returns
the
/// first value added.
/// </summary>
public Fieldable GetFieldable(System.String name)
{
return fields.GetFirst(name);
}
/// <summary>Returns the string value of the field with the given name if
any exist in
/// this document, or null. If multiple fields exist with this name, this
/// method returns the first value added. If only binary fields with this
name
/// exist, returns null.
/// </summary>
public System.String Get(System.String name)
{
return fields.GetFirstWithConstraint<string>(name,
delegate(Fieldable fieldable)
{
if (!fieldable.IsBinary())
return fieldable.StringValue();
return null;
});
}
/// <summary>Returns an Enumeration of all the fields in a
document.</summary>
/// <deprecated> use {@link <%7B@link> #GetFields()} instead
/// </deprecated>
[Obsolete("Use GetFields() instead")]
public System.Collections.IEnumerator Fields()
{
return new AnonymousClassEnumeration(this);
}
/// <summary>Returns a List of all the fields in a document.
/// <p/>Note that fields which are <i>not</i> {@link
<%7B@link>Fieldable#IsStored() stored} are
/// <i>not</i> available in documents retrieved from the
/// index, e.g. {@link <%7B@link> Searcher#Doc(int)} or {@link <%7B@link>
/// IndexReader#Document(int)}.
/// </summary>
public System.Collections.IList GetFields()
{
return fields.GetAllLists();
}
/// <summary> Returns an array of {@link <%7B@link> Field}s with the given
name.
/// Do not use with lazy loaded fields.
/// This method returns an empty array when there are no
/// matching fields. It never returns null.
///
/// </summary>
/// <param name="name">the name of the field
/// </param>
/// <returns> a <code>Field[]</code> array
/// </returns>
public Field[] GetFields(System.String name)
{
System.Collections.Generic.List<Fieldable> fieldables =
fields.GetList(name);
System.Collections.Generic.List<Field> localFields = new
System.Collections.Generic.List<Field>(fieldables.Count);
foreach (Fieldable item in fieldables)
{
localFields.Add(item as Field);
}
return localFields.ToArray();
}
/// <summary> Returns an array of {@link <%7B@link> Fieldable}s with the
given name.
/// This method returns an empty array when there are no
/// matching fields. It never returns null.
///
/// </summary>
/// <param name="name">the name of the field
/// </param>
/// <returns> a <code>Fieldable[]</code> array
/// </returns>
public Fieldable[] GetFieldables(System.String name)
{
return fields.GetList(name).ToArray();
}
/// <summary> Returns an array of values of the field specified as the
method parameter.
/// This method returns an empty array when there are no
/// matching fields. It never returns null.
/// </summary>
/// <param name="name">the name of the field
/// </param>
/// <returns> a <code>String[]</code> of field values
/// </returns>
public System.String[] GetValues(System.String name)
{
return fields.GetListWithConstraint<string>(name,
delegate(Fieldable fieldable)
{
if (!fieldable.IsBinary())
return fieldable.StringValue();
return null;
}).ToArray();
}
private static readonly byte[][] NO_BYTES = new byte[0][];
/// <summary> Returns an array of byte arrays for of the fields that have
the name specified
/// as the method parameter. This method returns an empty
/// array when there are no matching fields. It never
/// returns null.
///
/// </summary>
/// <param name="name">the name of the field
/// </param>
/// <returns> a <code>byte[][]</code> of binary field values
/// </returns>
public byte[][] GetBinaryValues(System.String name)
{
System.Collections.Generic.List<Fieldable> fieldables =
fields.GetListWithConstraint<Fieldable>(name,
delegate(Fieldable fieldable)
{
if (fieldable.IsBinary())
return fieldable;
return null;
});
System.Collections.IList result = new System.Collections.ArrayList();
for (int i = 0; i < fieldables.Count; i++)
{
Fieldable field = fieldables[i];
result.Add(field.BinaryValue());
}
if (result.Count == 0)
return NO_BYTES;
System.Collections.ICollection c = result;
object[] objects = new byte[result.Count][];
System.Type type = objects.GetType().GetElementType();
object[] objs = (object[])Array.CreateInstance(type, c.Count);
System.Collections.IEnumerator e = c.GetEnumerator();
int ii = 0;
while (e.MoveNext())
objs[ii++] = e.Current;
// If objects is smaller than c then do not return the new array
in the parameter
if (objects.Length >= c.Count)
objs.CopyTo(objects, 0);
return (byte[][])objs;
}
/// <summary> Returns an array of bytes for the first (or only) field that
has the name
/// specified as the method parameter. This method will return
<code>null</code>
/// if no binary fields with the specified name are available.
/// There may be non-binary fields with the same name.
///
/// </summary>
/// <param name="name">the name of the field.
/// </param>
/// <returns> a <code>byte[]</code> containing the binary field value
or
<code>null</code>
/// </returns>
public byte[] GetBinaryValue(System.String name)
{
return fields.GetFirstWithConstraint<byte[]>(name,
delegate(Fieldable fieldable)
{
if (fieldable.IsBinary())
return fieldable.BinaryValue();
return null;
});
}
/// <summary>Prints the fields of a document for human consumption.
</summary>
public override System.String ToString()
{
System.Collections.Generic.List<Fieldable> fieldables =
fields.GetAllLists();
System.Text.StringBuilder buffer = new System.Text.StringBuilder();
buffer.Append("Document<");
for (int i = 0; i < fields.Count; i++)
{
Fieldable field = fieldables[i];
buffer.Append(field.ToString());
if (i != fieldables.Count - 1)
buffer.Append(" ");
}
buffer.Append(">");
return buffer.ToString();
}
public System.Collections.IList fields_ForNUnit
{
get { return fields.GetAllLists(); }
}
}
}
|