diff --git a/lib/lucene.net/LICENSE.txt b/lib/lucene.net/LICENSE.txt new file mode 100644 index 000000000..a97ea565b --- /dev/null +++ b/lib/lucene.net/LICENSE.txt @@ -0,0 +1,239 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + +Some code in src/java/org/apache/lucene/util/UnicodeUtil.java was +derived from unicode conversion examples available at +http://www.unicode.org/Public/PROGRAMS/CVTUTF. Here is the copyright +from those sources: + +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + + +Some code in src/java/org/apache/lucene/util/ArrayUtil.java was +derived from Python 2.4.2 sources available at +http://www.python.org. Full license is here: + + http://www.python.org/download/releases/2.4.2/license/ + + diff --git a/lib/lucene.net/Lucene.Net.xml b/lib/lucene.net/Lucene.Net.xml new file mode 100644 index 000000000..a1b6d6d60 --- /dev/null +++ b/lib/lucene.net/Lucene.Net.xml @@ -0,0 +1,27275 @@ + + + + Lucene.Net + + + + Subclasses of StringInterner are required to + return the same single String object for all equal strings. + Depending on the implementation, this may not be + the same object returned as String.intern(). + + This StringInterner base class simply delegates to String.intern(). + + + + Returns a single object instance for each equal string. + + + Returns a single object instance for each equal string. + + + Floating point numbers smaller than 32 bits. + + + $Id$ + + + + Converts a 32 bit float to an 8 bit float. +
Values less than zero are all mapped to zero. +
Values are truncated (rounded down) to the nearest 8 bit value. +
Values between zero and the smallest representable value + are rounded up. + +
+ the 32 bit float to be converted to an 8 bit float (byte) + + the number of mantissa bits to use in the byte, with the remainder to be used in the exponent + + the zero-point in the range of exponent values + + the 8 bit float representation + +
+ + Converts an 8 bit float to a 32 bit float. + + + floatToByte(b, mantissaBits=3, zeroExponent=15) +
smallest non-zero value = 5.820766E-10 +
largest value = 7.5161928E9 +
epsilon = 0.125 +
+
+ + byteToFloat(b, mantissaBits=3, zeroExponent=15) + + + floatToByte(b, mantissaBits=5, zeroExponent=2) +
smallest nonzero value = 0.033203125 +
largest value = 1984.0 +
epsilon = 0.03125 +
+
+ + byteToFloat(b, mantissaBits=5, zeroExponent=2) + + + + This interface should be implemented by any class whose instances are intended + to be executed by a thread. + + + + + This method has to be implemented in order that starting of the thread causes the object's + run method to be called in that separately executing thread. + + + + + Contains conversion support elements such as classes, interfaces and static methods. + + + + + Copies an array of chars obtained from a String into a specified array of chars + + The String to get the chars from + Position of the String to start getting the chars + Position of the String to end getting the chars + Array to return the chars + Position of the destination array of chars to start storing the chars + An array of chars + + + + Support class used to handle threads + + + + + The instance of System.Threading.Thread + + + + + Initializes a new instance of the ThreadClass class + + + + + Initializes a new instance of the Thread class. + + The name of the thread + + + + Initializes a new instance of the Thread class. + + A ThreadStart delegate that references the methods to be invoked when this thread begins executing + + + + Initializes a new instance of the Thread class. + + A ThreadStart delegate that references the methods to be invoked when this thread begins executing + The name of the thread + + + + This method has no functionality unless the method is overridden + + + + + Causes the operating system to change the state of the current thread instance to ThreadState.Running + + + + + Interrupts a thread that is in the WaitSleepJoin thread state + + + + + Blocks the calling thread until a thread terminates + + + + + Blocks the calling thread until a thread terminates or the specified time elapses + + Time of wait in milliseconds + + + + Blocks the calling thread until a thread terminates or the specified time elapses + + Time of wait in milliseconds + Time of wait in nanoseconds + + + + Resumes a thread that has been suspended + + + + + Raises a ThreadAbortException in the thread on which it is invoked, + to begin the process of terminating the thread. Calling this method + usually terminates the thread + + + + + Raises a ThreadAbortException in the thread on which it is invoked, + to begin the process of terminating the thread while also providing + exception information about the thread termination. + Calling this method usually terminates the thread. + + An object that contains application-specific information, such as state, which can be used by the thread being aborted + + + + Suspends the thread, if the thread is already suspended it has no effect + + + + + Obtain a String that represents the current object + + A String that represents the current object + + + + Gets the currently running thread + + The currently running thread + + + + Gets the current thread instance + + + + + Gets or sets the name of the thread + + + + + Gets or sets a value indicating the scheduling priority of a thread + + + + + Gets a value indicating the execution status of the current thread + + + + + Gets or sets a value indicating whether or not a thread is a background thread. + + + + + Represents the methods to support some operations over files. + + + + + Returns an array of abstract pathnames representing the files and directories of the specified path. + + The abstract pathname to list it childs. + An array of abstract pathnames childs of the path specified or null if the path is not a directory + + + + Returns a list of files in a give directory. + + The full path name to the directory. + + An array containing the files. + + + + Flushes the specified file stream. Ensures that all buffered + data is actually written to the file system. + + The file stream. + + + + A simple class for number conversions. + + + + + Min radix value. + + + + + Max radix value. + + + + + Converts a number to System.String. + + + + + + + Converts a number to System.String. + + + + + + + Converts a number to System.String in the specified radix. + + A number to be converted. + A radix. + A System.String representation of the number in the specified redix. + + + + Parses a number in the specified radix. + + An input System.String. + A radix. + The parsed number in the specified radix. + + + + Performs an unsigned bitwise right shift with the specified number + + Number to operate on + Ammount of bits to shift + The resulting number from the shift operation + + + + Performs an unsigned bitwise right shift with the specified number + + Number to operate on + Ammount of bits to shift + The resulting number from the shift operation + + + + Returns the index of the first bit that is set to true that occurs + on or after the specified starting index. If no such bit exists + then -1 is returned. + + The BitArray object. + The index to start checking from (inclusive). + The index of the next set bit. + + + + Converts a System.String number to long. + + + + + + + Mimics Java's Character class. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This class provides supporting methods of java.util.BitSet + that are not present in System.Collections.BitArray. + + + + + Returns the next set bit at or after index, or -1 if no such bit exists. + + + the index of bit array at which to start checking + the next set bit or -1 + + + + Returns the next un-set bit at or after index, or -1 if no such bit exists. + + + the index of bit array at which to start checking + the next set bit or -1 + + + + Returns the number of bits set to true in this BitSet. + + The BitArray object. + The number of bits set to true in this BitSet. + + + + Summary description for TestSupportClass. + + + + + Compares two Term arrays for equality. + + First Term array to compare + Second Term array to compare + true if the Terms are equal in both arrays, false otherwise + + + + A Hashtable which holds weak references to its keys so they + can be collected during GC. + + + + + Serves as a simple "GC Monitor" that indicates whether cleanup is needed. + If collectableObject.IsAlive is false, GC has occurred and we should perform cleanup + + + + + Customize the hashtable lookup process by overriding KeyEquals. KeyEquals + will compare both WeakKey to WeakKey and WeakKey to real keys + + + + + Perform cleanup if GC occurred + + + + + Iterate over all keys and remove keys that were collected + + + + + Wrap each key with a WeakKey and add it to the hashtable + + + + + Create a temporary copy of the real keys and return that + + + + + A weak referene wrapper for the hashtable keys. Whenever a key\value pair + is added to the hashtable, the key is wrapped using a WeakKey. WeakKey saves the + value of the original object hashcode for fast comparison. + + + + + A Dictionary enumerator which wraps the original hashtable enumerator + and performs 2 tasks: Extract the real key from a WeakKey and skip keys + that were already collected. + + + + + Support class used to handle Hashtable addition, which does a check + first to make sure the added item is unique in the hash. + + + + + Converts the specified collection to its string representation. + + The collection to convert to string. + A string representation of the specified collection. + + + + Compares two string arrays for equality. + + First string array list to compare + Second string array list to compare + true if the strings are equal in both arrays, false otherwise + + + + Sorts an IList collections + + The System.Collections.IList instance that will be sorted + The Comparator criteria, null to use natural comparator. + + + + Fills the array with an specific value from an specific index to an specific index. + + The array to be filled. + The first index to be filled. + The last index to be filled. + The value to fill the array with. + + + + Fills the array with an specific value. + + The array to be filled. + The value to fill the array with. + + + + Compares the entire members of one array whith the other one. + + The array to be compared. + The array to be compared with. + Returns true if the two specified arrays of Objects are equal + to one another. The two arrays are considered equal if both arrays + contain the same number of elements, and all corresponding pairs of + elements in the two arrays are equal. Two objects e1 and e2 are + considered equal if (e1==null ? e2==null : e1.equals(e2)). In other + words, the two arrays are equal if they contain the same elements in + the same order. Also, two array references are considered equal if + both are null. + + + A collection of which can be + looked up by instances of . + The type of the items contains in this + collection. + The type of the keys that can be used to look + up the items. + + + Creates a new instance of the + class. + The which will convert + instances of to + when the override of is called. + + + The which will convert + instances of to + when the override of is called. + + + Converts an item that is added to the collection to + a key. + The instance of + to convert into an instance of . + The instance of which is the + key for this item. + + + Determines if a key for an item exists in this + collection. + The instance of + to see if it exists in this collection. + True if the key exists in the collection, false otherwise. + + + Represents a strongly typed list of objects that can be accessed by index. + Provides methods to search, sort, and manipulate lists. Also provides functionality + to compare lists against each other through an implementations of + . + The type of elements in the list. + + + Initializes a new instance of the + class that is empty and has the + default initial capacity. + + + Initializes a new instance of the + class that contains elements copied from the specified collection and has + sufficient capacity to accommodate the number of elements copied. + The collection whose elements are copied to the new list. + + + Initializes a new instance of the + class that is empty and has the specified initial capacity. + The number of elements that the new list can initially store. + + + Adds a range of objects represented by the + implementation. + The + implementation to add to this list. + + + Compares the counts of two + implementations. + This uses a trick in LINQ, sniffing types for implementations + of interfaces that might supply shortcuts when trying to make comparisons. + In this case, that is the and + interfaces, either of which can provide a count + which can be used in determining the equality of sequences (if they don't have + the same count, then they can't be equal). + The from the left hand side of the + comparison to check the count of. + The from the right hand side of the + comparison to check the count of. + Null if the result is indeterminate. This occurs when either + or doesn't implement or . + Otherwise, it will get the count from each and return true if they are equal, false otherwise. + + + Compares the contents of a + implementation to another one to determine equality. + Thinking of the implementation as + a string with any number of characters, the algorithm checks + each item in each list. If any item of the list is not equal (or + one list contains all the elements of another list), then that list + element is compared to the other list element to see which + list is greater. + The implementation + that is considered the left hand side. + The implementation + that is considered the right hand side. + True if the items are equal, false otherwise. + + + Compares this sequence to another + implementation, returning true if they are equal, false otherwise. + The other implementation + to compare against. + True if the sequence in + is the same as this one. + + + Compares this object for equality against other. + The other object to compare this object against. + True if this object and are equal, false + otherwise. + + + Gets the hash code for the list. + The hash code value. + + + Gets the hash code for the list. + The + implementation which will have all the contents hashed. + The hash code value. + + + Clones the . + This is a shallow clone. + A new shallow clone of this + . + + + + A simple wrapper to allow for the use of the GeneralKeyedCollection. The + wrapper is required as there can be several keys for an object depending + on how many interfaces it implements. + + + + + Provides platform infos. + + + + + Whether we run under a Unix platform. + + + + + Whether we run under a supported Windows platform. + + + + A memory-resident {@link Directory} implementation. Locking + implementation is by default the {@link SingleInstanceLockFactory} + but can be changed with {@link #setLockFactory}. + + + $Id: RAMDirectory.java 781333 2009-06-03 10:38:57Z mikemccand $ + + + + A Directory is a flat list of files. Files may be written once, when they + are created. Once a file is created it may only be opened for read, or + deleted. Random access is permitted both when reading and writing. + +

Java's i/o APIs not used directly, but rather all i/o is + through this API. This permits things such as:

    +
  • implementation of RAM-based indices;
  • +
  • implementation indices stored in a database, via JDBC;
  • +
  • implementation of an index as a single file;
  • +
+ + Directory locking is implemented by an instance of {@link + LockFactory}, and can be changed for each Directory + instance using {@link #setLockFactory}. + +
+
+ + Holds the LockFactory instance (implements locking for + this Directory instance). + + + + For some Directory implementations ({@link + FSDirectory}, and its subclasses), this method + silently filters its results to include only index + files. Please use {@link #listAll} instead, which + does no filtering. + + + + Returns an array of strings, one for each file in the + directory. Unlike {@link #list} this method does no + filtering of the contents in a directory, and it will + never return null (throws IOException instead). + + Currently this method simply fallsback to {@link + #list} for Directory impls outside of Lucene's core & + contrib, but in 3.0 that method will be removed and + this method will become abstract. + + + + Returns true iff a file with the given name exists. + + + Returns the time the named file was last modified. + + + Set the modified time of an existing file to now. + + + Removes an existing file in the directory. + + + Renames an existing file in the directory. + If a file already exists with the new name, then it is replaced. + This replacement is not guaranteed to be atomic. + + + + + + Returns the length of a file in the directory. + + + Creates a new, empty file in the directory with the given name. + Returns a stream writing this file. + + + + Ensure that any writes to this file are moved to + stable storage. Lucene uses this to properly commit + changes to the index, to prevent a machine/OS crash + from corrupting the index. + + + + Returns a stream reading an existing file. + + + Returns a stream reading an existing file, with the + specified read buffer size. The particular Directory + implementation may ignore the buffer size. Currently + the only Directory implementations that respect this + parameter are {@link FSDirectory} and {@link + Lucene.Net.Index.CompoundFileReader}. + + + + Construct a {@link Lock}. + the name of the lock file + + + + Attempt to clear (forcefully unlock and remove) the + specified lock. Only call this at a time when you are + certain this lock is no longer in use. + + name of the lock to be cleared. + + + + Closes the store. + + + Set the LockFactory that this Directory instance should + use for its locking implementation. Each * instance of + LockFactory should only be used for one directory (ie, + do not share a single instance across multiple + Directories). + + + instance of {@link LockFactory}. + + + + Get the LockFactory that this Directory instance is + using for its locking implementation. Note that this + may be null for Directory implementations that provide + their own locking implementation. + + + + Return a string identifier that uniquely differentiates + this Directory instance from other Directory instances. + This ID should be the same if two Directory instances + (even in different JVMs and/or on different machines) + are considered "the same index". This is how locking + "scopes" to the right index. + + + + Copy contents of a directory src to a directory dest. + If a file in src already exists in dest then the + one in dest will be blindly overwritten. + +

NOTE: the source directory cannot change + while this method is running. Otherwise the results + are undefined and you could easily hit a + FileNotFoundException. + +

NOTE: this method only copies files that look + like index files (ie, have extensions matching the + known extensions of index files). + +

+ source directory + + destination directory + + if true, call {@link #Close()} method on source directory + + IOException +
+ + AlreadyClosedException if this Directory is closed + + + Constructs an empty {@link Directory}. + + + Creates a new RAMDirectory instance from a different + Directory implementation. This can be used to load + a disk-based index into memory. +

+ This should be used only with indices that can fit into memory. +

+ Note that the resulting RAMDirectory instance is fully + independent from the original Directory (it is a + complete copy). Any subsequent changes to the + original Directory will not be visible in the + RAMDirectory instance. + +

+ a Directory value + + if an error occurs + +
+ + Creates a new RAMDirectory instance from the {@link FSDirectory}. + + + a File specifying the index directory + + + + + Use {@link #RAMDirectory(Directory)} instead + + + + Creates a new RAMDirectory instance from the {@link FSDirectory}. + + + a String specifying the full index directory path + + + + + Use {@link #RAMDirectory(Directory)} instead + + + + Returns true iff the named file exists in this directory. + + + Returns the time the named file was last modified. + IOException if the file does not exist + + + Set the modified time of an existing file to now. + IOException if the file does not exist + + + Returns the length in bytes of a file in the directory. + IOException if the file does not exist + + + Return total size in bytes of all files in this + directory. This is currently quantized to + RAMOutputStream.BUFFER_SIZE. + + + + Removes an existing file in the directory. + IOException if the file does not exist + + + Renames an existing file in the directory. + FileNotFoundException if from does not exist + + + + + Creates a new, empty file in the directory with the given name. Returns a stream writing this file. + + + Returns a stream reading an existing file. + + + Closes the store to future operations, releasing associated memory. + + + Expert: Calculate query weights and build query scorers. +

+ The purpose of {@link Weight} is to ensure searching does not + modify a {@link Query}, so that a {@link Query} instance can be reused.
+ {@link Searcher} dependent state of the query should reside in the + {@link Weight}.
+ {@link IndexReader} dependent state should reside in the {@link Scorer}. +

+ A Weight is used in the following way: +

    +
  1. A Weight is constructed by a top-level query, given a + Searcher ({@link Query#CreateWeight(Searcher)}).
  2. +
  3. The {@link #SumOfSquaredWeights()} method is called on the + Weight to compute the query normalization factor + {@link Similarity#QueryNorm(float)} of the query clauses contained in the + query.
  4. +
  5. The query normalization factor is passed to {@link #Normalize(float)}. At + this point the weighting is complete.
  6. +
  7. A Scorer is constructed by {@link #Scorer(IndexReader,boolean,boolean)}.
  8. +
+ +
+ 2.9 + +
+ + An explanation of the score computation for the named document. + + + sub-reader containing the give doc + + + + an Explanation for the score + + IOException + + + The query that this concerns. + + + The weight for this query. + + + Assigns the query normalization factor to this. + + + Returns a {@link Scorer} which scores documents in/out-of order according + to scoreDocsInOrder. +

+ NOTE: even if scoreDocsInOrder is false, it is + recommended to check whether the returned Scorer indeed scores + documents out of order (i.e., call {@link #ScoresDocsOutOfOrder()}), as + some Scorer implementations will always return documents + in-order.
+ NOTE: null can be returned if no documents will be scored by this + query. + +

+ + the {@link IndexReader} for which to return the {@link Scorer}. + + specifies whether in-order scoring of documents is required. Note + that if set to false (i.e., out-of-order scoring is required), + this method can return whatever scoring mode it supports, as every + in-order scorer is also an out-of-order one. However, an + out-of-order scorer may not support {@link Scorer#NextDoc()} + and/or {@link Scorer#Advance(int)}, therefore it is recommended to + request an in-order scorer if use of these methods is required. + + + if true, {@link Scorer#Score(Collector)} will be called; if false, + {@link Scorer#NextDoc()} and/or {@link Scorer#Advance(int)} will + be called. + + a {@link Scorer} which scores documents in/out-of order. + + IOException +
+ + The sum of squared weights of contained query clauses. + + + Returns true iff this implementation scores docs only out of order. This + method is used in conjunction with {@link Collector}'s + {@link Collector#AcceptsDocsOutOfOrder() acceptsDocsOutOfOrder} and + {@link #Scorer(Lucene.Net.Index.IndexReader, boolean, boolean)} to + create a matching {@link Scorer} instance for a given {@link Collector}, or + vice versa. +

+ NOTE: the default implementation returns false, i.e. + the Scorer scores documents in-order. +

+
+ +

+ The TimeLimitedCollector is used to timeout search requests that take longer + than the maximum allowed search time limit. After this time is exceeded, the + search thread is stopped by throwing a TimeExceeded Exception. +

+ +

+ Use {@link TimeLimitingCollector} instead, which extends the new + {@link Collector}. This class will be removed in 3.0. + +
+ + Lower-level search API.
+ HitCollectors are primarily meant to be used to implement queries, sorting + and filtering. See {@link Collector} for a lower level and higher performance + (on a multi-segment index) API. + +
+ + + $Id: HitCollector.java 764551 2009-04-13 18:33:56Z mikemccand $ + + Please use {@link Collector} instead. + +
+ + Called once for every document matching a query, with the document + number and its raw score. + +

If, for example, an application wished to collect all of the hits for a + query in a BitSet, then it might:

+            Searcher searcher = new IndexSearcher(indexReader);
+            final BitSet bits = new BitSet(indexReader.maxDoc());
+            searcher.search(query, new HitCollector() {
+            public void collect(int doc, float score) {
+            bits.set(doc);
+            }
+            });
+            
+ +

Note: This is called in an inner search loop. For good search + performance, implementations of this method should not call + {@link Searcher#Doc(int)} or + {@link Lucene.Net.Index.IndexReader#Document(int)} on every + document number encountered. Doing so can slow searches by an order + of magnitude or more. +

Note: The score passed to this method is a raw score. + In other words, the score will not necessarily be a float whose value is + between 0 and 1. +

+
+ + Default timer resolution. + + + + + Default for {@link #IsGreedy()}. + + + + + Create a TimeLimitedCollector wrapper over another HitCollector with a specified timeout. + the wrapped HitCollector + + max time allowed for collecting hits after which {@link TimeExceededException} is thrown + + + + Calls collect() on the decorated HitCollector. + + + TimeExceededException if the time allowed has been exceeded. + + + Return the timer resolution. + + + + + Set the timer resolution. + The default timer resolution is 20 milliseconds. + This means that a search required to take no longer than + 800 milliseconds may be stopped after 780 to 820 milliseconds. +
Note that: +
    +
  • Finer (smaller) resolution is more accurate but less efficient.
  • +
  • Setting resolution to less than 5 milliseconds will be silently modified to 5 milliseconds.
  • +
  • Setting resolution smaller than current resolution might take effect only after current + resolution. (Assume current resolution of 20 milliseconds is modified to 5 milliseconds, + then it can take up to 20 milliseconds for the change to have effect.
  • +
+
+
+ + Checks if this time limited collector is greedy in collecting the last hit. + A non greedy collector, upon a timeout, would throw a {@link TimeExceededException} + without allowing the wrapped collector to collect current doc. A greedy one would + first allow the wrapped hit collector to collect current doc and only then + throw a {@link TimeExceededException}. + + + + + + Sets whether this time limited collector is greedy. + true to make this time limited greedy + + + + + + TimerThread provides a pseudo-clock service to all searching + threads, so that they can count elapsed time with less overhead + than repeatedly calling System.currentTimeMillis. A single + thread should be created to be used for all searches. + + + + Get the timer value in milliseconds. + + + Thrown when elapsed search time exceeds allowed search time. + + + Returns allowed time (milliseconds). + + + Returns elapsed time (milliseconds). + + + Returns last doc that was collected when the search time exceeded. + + + A Spans that is formed from the ordered subspans of a SpanNearQuery + where the subspans do not overlap and have a maximum slop between them. +

+ The formed spans only contains minimum slop matches.
+ The matching slop is computed from the distance(s) between + the non overlapping matching Spans.
+ Successive matches are always formed from the successive Spans + of the SpanNearQuery. +

+ The formed spans may contain overlaps when the slop is at least 1. + For example, when querying using +

t1 t2 t3
+ with slop at least 1, the fragment: +
t1 t2 t1 t3 t2 t3
+ matches twice: +
t1 t2 .. t3      
+
      t1 .. t2 t3
+ + + Expert: + Only public for subclassing. Most implementations should not need this class +
+
+ + Expert: an enumeration of span matches. Used to implement span searching. + Each span represents a range of term positions within a document. Matches + are enumerated in order, by increasing document number, within that by + increasing start position and finally by increasing end position. + + + + Move to the next match, returning true iff any such exists. + + + Skips to the first match beyond the current, whose document number is + greater than or equal to target.

Returns true iff there is such + a match.

Behaves as if written:

+            boolean skipTo(int target) {
+            do {
+            if (!next())
+            return false;
+            } while (target > doc());
+            return true;
+            }
+            
+ Most implementations are considerably more efficient than that. +
+
+ + Returns the document number of the current match. Initially invalid. + + + Returns the start position of the current match. Initially invalid. + + + Returns the end position of the current match. Initially invalid. + + + Returns the payload data for the current span. + This is invalid until {@link #Next()} is called for + the first time. + This method must not be called more than once after each call + of {@link #Next()}. However, most payloads are loaded lazily, + so if the payload data for the current position is not needed, + this method may not be called at all for performance reasons. An ordered + SpanQuery does not lazy load, so if you have payloads in your index and + you do not want ordered SpanNearQuerys to collect payloads, you can + disable collection with a constructor option.
+ + Note that the return type is a collection, thus the ordering should not be relied upon. +
+

+ WARNING: The status of the Payloads feature is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case.

+ +

+ a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false + + java.io.IOException +
+ + Checks if a payload can be loaded at this position. +

+ Payloads can only be loaded once per call to + {@link #Next()}. + +

+ true if there is a payload available at this position that can be loaded + +
+ + The spans in the same order as the SpanNearQuery + + + Indicates that all subSpans have same doc() + + + Advances the subSpans to just after an ordered match with a minimum slop + that is smaller than the slop allowed by the SpanNearQuery. + + true iff there is such a match. + + + + Advance the subSpans to the same document + + + Check whether two Spans in the same document are ordered. + + + + + true iff spans1 starts before spans2 + or the spans start at the same position, + and spans1 ends before spans2. + + + + Like {@link #DocSpansOrdered(Spans,Spans)}, but use the spans + starts and ends as parameters. + + + + Order the subSpans within the same document by advancing all later spans + after the previous one. + + + + The subSpans are ordered in the same doc, so there is a possible match. + Compute the slop while making the match as short as possible by advancing + all subSpans except the last one in reverse order. + + + + Abstract base class providing a mechanism to restrict searches to a subset + of an index and also maintains and returns position information. + This is useful if you want to compare the positions from a SpanQuery with the positions of items in + a filter. For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents, + and then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could + then compare position information for post processing. + + + + Abstract base class for restricting which documents may be returned during searching. +

+ Note: In Lucene 3.0 {@link #Bits(IndexReader)} will be removed + and {@link #GetDocIdSet(IndexReader)} will be defined as abstract. + All implementing classes must therefore implement {@link #GetDocIdSet(IndexReader)} + in order to work with Lucene 3.0. +

+
+ + + + Creates a {@link DocIdSet} enumerating the documents that should be + permitted in search results. NOTE: null can be + returned if no documents are accepted by this Filter. +

+ Note: This method will be called once per segment in + the index during searching. The returned {@link DocIdSet} + must refer to document IDs for that segment, not for + the top-level reader. + + @param reader a {@link IndexReader} instance opened on the index currently + searched on. Note, it is likely that the provided reader does not + represent the whole underlying index i.e. if the index has more than + one segment the given reader only represents a single segment. + +

+ a DocIdSet that provides the documents which should be permitted or + prohibited in search results. NOTE: null can be returned if + no documents will be accepted by this Filter. + + + +
+ + Returns a SpanFilterResult with true for documents which should be permitted in + search results, and false for those that should not and Spans for where the true docs match. + + The {@link Lucene.Net.Index.IndexReader} to load position and DocIdSet information from + + A {@link SpanFilterResult} + + java.io.IOException if there was an issue accessing the necessary information + + + + + Expert: Compares two ScoreDoc objects for sorting. + +

Created: Feb 3, 2004 9:00:16 AM + +

+ lucene 1.4 + + $Id: ScoreDocComparator.java 738219 2009-01-27 20:15:21Z mikemccand $ + + use {@link FieldComparator} + +
+ + Special comparator for sorting hits according to computed relevance (document score). + + + Special comparator for sorting hits according to index order (document number). + + + Compares two ScoreDoc objects and returns a result indicating their + sort order. + + First ScoreDoc + + Second ScoreDoc + + a negative integer if i should come before j
+ a positive integer if i should come after j
+ 0 if they are equal +
+ + +
+ + Returns the value used to sort the given document. The + object returned must implement the java.io.Serializable + interface. This is used by multisearchers to determine how + to collate results from their searchers. + + + + Document + + Serializable object + + + + Returns the type of sort. Should return SortField.SCORE, + SortField.DOC, SortField.STRING, + SortField.INTEGER, SortField.FLOAT or + SortField.CUSTOM. It is not valid to return + SortField.AUTO. + This is used by multisearchers to determine how to collate results + from their searchers. + + One of the constants in SortField. + + + + + + Subclass of FilteredTermEnum for enumerating all terms that match the + specified prefix filter term. +

+ Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. + +

+
+ + Abstract class for enumerating a subset of all terms. +

Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+
+ + Abstract class for enumerating terms. +

Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+
+ + Increments the enumeration to the next element. True if one exists. + + + Returns the current Term in the enumeration. + + + Returns the docFreq of the current Term in the enumeration. + + + Closes the enumeration to further activity, freeing resources. + + + Skips terms to the first beyond the current whose value is + greater or equal to target.

Returns true iff there is such + an entry.

Behaves as if written:

+            public boolean skipTo(Term target) {
+            do {
+            if (!next())
+            return false;
+            } while (target > term());
+            return true;
+            }
+            
+ Some implementations *could* be considerably more efficient than a linear scan. + Check the implementation to be sure. +
+ This method is not performant and will be removed in Lucene 3.0. + Use {@link IndexReader#Terms(Term)} to create a new TermEnum positioned at a + given term. + +
+ + the current term + + + the delegate enum - to set this member use {@link #setEnum} + + + Equality compare on the term + + + Equality measure on the term + + + Indicates the end of the enumeration has been reached + + + use this method to set the actual TermEnum (e.g. in ctor), + it will be automatically positioned on the first matching term. + + + + Returns the docFreq of the current Term in the enumeration. + Returns -1 if no Term matches or all terms have been enumerated. + + + + Increments the enumeration to the next element. True if one exists. + + + Returns the current Term in the enumeration. + Returns null if no Term matches or all terms have been enumerated. + + + + Closes the enumeration to further activity, freeing resources. + + + Position of a term in a document that takes into account the term offset within the phrase. + + + Go to next location of this term current document, and set + position as location - offset, so that a + matching exact phrase is easily identified when all PhrasePositions + have exactly the same position. + + + + Expert: obtains the ordinal of the field value from the default Lucene + {@link Lucene.Net.Search.FieldCache Fieldcache} using getStringIndex(). +

+ The native lucene index order is used to assign an ordinal value for each field value. +

+ Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. +

+ Example: +
If there were only three field values: "apple","banana","pear" +
then ord("apple")=1, ord("banana")=2, ord("pear")=3 +

+ WARNING: + ord() depends on the position in an index and can thus change + when other documents are inserted or deleted, + or if a MultiSearcher is used. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

NOTE: with the switch in 2.9 to segment-based + searching, if {@link #getValues} is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API. Alternatively, for + a short-term fix, you could wrap your ValueSource using + {@link MultiValueSource}, which costs more CPU per lookup + but will not consume double the FieldCache RAM.

+

+
+ + Expert: source of values for basic function queries. +

At its default/simplest form, values - one per doc - are used as the score of that doc. +

Values are instantiated as + {@link Lucene.Net.Search.Function.DocValues DocValues} for a particular reader. +

ValueSource implementations differ in RAM requirements: it would always be a factor + of the number of documents, but for each document the number of bytes can be 1, 2, 4, or 8. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + + +

+
+ + Return the DocValues used by the function query. + the IndexReader used to read these values. + If any caching is involved, that caching would also be IndexReader based. + + IOException for any error. + + + description of field, used in explain() + + + Needed for possible caching of query results - used by {@link ValueSourceQuery#equals(Object)}. + + + + + Needed for possible caching of query results - used by {@link ValueSourceQuery#hashCode()}. + + + + + Constructor for a certain field. + field whose values order is used. + + + + Expert: represents field values as different types. + Normally created via a + {@link Lucene.Net.Search.Function.ValueSource ValueSuorce} + for a particular field and reader. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + + +

+
+ + Return doc value as a float. +

Mandatory: every DocValues implementation must implement at least this method. +

+ document whose float value is requested. + +
+ + Return doc value as an int. +

Optional: DocValues implementation can (but don't have to) override this method. +

+ document whose int value is requested. + +
+ + Return doc value as a long. +

Optional: DocValues implementation can (but don't have to) override this method. +

+ document whose long value is requested. + +
+ + Return doc value as a double. +

Optional: DocValues implementation can (but don't have to) override this method. +

+ document whose double value is requested. + +
+ + Return doc value as a string. +

Optional: DocValues implementation can (but don't have to) override this method. +

+ document whose string value is requested. + +
+ + Return a string representation of a doc value, as reuired for Explanations. + + + Explain the scoring value for the input doc. + + + Expert: for test purposes only, return the inner array of values, or null if not applicable. +

+ Allows tests to verify that loaded values are: +

    +
  1. indeed cached/reused.
  2. +
  3. stored in the expected size/type (byte/short/int/float).
  4. +
+ Note: implementations of DocValues must override this method for + these test elements to be tested, Otherwise the test would not fail, just + print a warning. +
+
+ + Returns the minimum of all values or Float.NaN if this + DocValues instance does not contain any value. +

+ This operation is optional +

+ +

+ the minimum of all values or Float.NaN if this + DocValues instance does not contain any value. + +
+ + Returns the maximum of all values or Float.NaN if this + DocValues instance does not contain any value. +

+ This operation is optional +

+ +

+ the maximum of all values or Float.NaN if this + DocValues instance does not contain any value. + +
+ + Returns the average of all values or Float.NaN if this + DocValues instance does not contain any value. * +

+ This operation is optional +

+ +

+ the average of all values or Float.NaN if this + DocValues instance does not contain any value + +
+ + Expert: A hit queue for sorting by hits by terms in more than one field. + Uses FieldCache.DEFAULT for maintaining + internal term lookup tables. + + This class will not resolve SortField.AUTO types, and expects the type + of all SortFields used for construction to already have been resolved. + {@link SortField#DetectFieldType(IndexReader, String)} is a utility method which + may be used for field type detection. + + NOTE: This API is experimental and might change in + incompatible ways in the next release. + + + 2.9 + + $Id: + + + + + + + + A PriorityQueue maintains a partial ordering of its elements such that the + least element can always be found in constant time. Put()'s and pop()'s + require log(size) time. + +

NOTE: This class pre-allocates a full array of + length maxSize+1, in {@link #initialize}. + +

+
+ + Determines the ordering of objects in this priority queue. Subclasses + must define this one method. + + + + This method can be overridden by extending classes to return a sentinel + object which will be used by {@link #Initialize(int)} to fill the queue, so + that the code which uses that queue can always assume it's full and only + change the top without attempting to insert any new object.
+ + Those sentinel values should always compare worse than any non-sentinel + value (i.e., {@link #LessThan(Object, Object)} should always favor the + non-sentinel values).
+ + By default, this method returns false, which means the queue will not be + filled with sentinel values. Otherwise, the value returned will be used to + pre-populate the queue. Adds sentinel values to the queue.
+ + If this method is extended to return a non-null value, then the following + usage pattern is recommended: + +
+            // extends getSentinelObject() to return a non-null value.
+            PriorityQueue pq = new MyQueue(numHits);
+            // save the 'top' element, which is guaranteed to not be null.
+            MyObject pqTop = (MyObject) pq.top();
+            <...>
+            // now in order to add a new element, which is 'better' than top (after 
+            // you've verified it is better), it is as simple as:
+            pqTop.change().
+            pqTop = pq.updateTop();
+            
+ + NOTE: if this method returns a non-null value, it will be called by + {@link #Initialize(int)} {@link #Size()} times, relying on a new object to + be returned and will not check if it's null again. Therefore you should + ensure any call to this method creates a new instance and behaves + consistently, e.g., it cannot return null if it previously returned + non-null. + +
+ the sentinel object to use to pre-populate the queue, or null if + sentinel objects are not supported. + +
+ + Subclass constructors must call this. + + + Adds an Object to a PriorityQueue in log(size) time. If one tries to add + more objects than maxSize from initialize a RuntimeException + (ArrayIndexOutOfBound) is thrown. + + + use {@link #Add(Object)} which returns the new top object, + saving an additional call to {@link #Top()}. + + + + Adds an Object to a PriorityQueue in log(size) time. If one tries to add + more objects than maxSize from initialize an + {@link ArrayIndexOutOfBoundsException} is thrown. + + + the new 'top' element in the queue. + + + + Adds element to the PriorityQueue in log(size) time if either the + PriorityQueue is not full, or not lessThan(element, top()). + + + + + true if element is added, false otherwise. + + use {@link #InsertWithOverflow(Object)} instead, which + encourages objects reuse. + + + + insertWithOverflow() is the same as insert() except its + return value: it returns the object (if any) that was + dropped off the heap because it was full. This can be + the given parameter (in case it is smaller than the + full heap's minimum, and couldn't be added), or another + object that was previously the smallest value in the + heap and now has been replaced by a larger one, or null + if the queue wasn't yet full with maxSize elements. + + + + Returns the least element of the PriorityQueue in constant time. + + + Removes and returns the least element of the PriorityQueue in log(size) + time. + + + + Should be called when the Object at top changes values. Still log(n) worst + case, but it's at least twice as fast to + +
+            pq.top().change();
+            pq.adjustTop();
+            
+ + instead of + +
+            o = pq.pop();
+            o.change();
+            pq.push(o);
+            
+ +
+ use {@link #UpdateTop()} which returns the new top element and + saves an additional call to {@link #Top()}. + +
+ + Should be called when the Object at top changes values. Still log(n) worst + case, but it's at least twice as fast to + +
+            pq.top().change();
+            pq.updateTop();
+            
+ + instead of + +
+            o = pq.pop();
+            o.change();
+            pq.push(o);
+            
+ +
+ the new 'top' element. + +
+ + Returns the number of elements currently stored in the PriorityQueue. + + + Removes all entries from the PriorityQueue. + + + Creates a hit queue sorted by the given list of fields. + +

NOTE: The instances returned by this method + pre-allocate a full array of length numHits. + +

+ SortField array we are sorting by in priority order (highest + priority first); cannot be null or empty + + The number of hits to retain. Must be greater than zero. + + IOException +
+ + Stores the sort criteria being used. + + + Given a queue Entry, creates a corresponding FieldDoc + that contains the values used to sort the given document. + These values are not the raw values out of the index, but the internal + representation of them. This is so the given search hit can be collated by + a MultiSearcher with other search hits. + + + The Entry used to create a FieldDoc + + The newly created FieldDoc + + + + + + Returns the SortFields being used by this hit queue. + + + An implementation of {@link FieldValueHitQueue} which is optimized in case + there is just one comparator. + + + + Returns whether a is less relevant than b. + ScoreDoc + + ScoreDoc + + true if document a should be sorted after document b. + + + + An implementation of {@link FieldValueHitQueue} which is optimized in case + there is more than one comparator. + + + + Provides a {@link FieldComparator} for custom field sorting. + + NOTE: This API is experimental and might change in + incompatible ways in the next release. + + + + + Creates a comparator for the field in the given index. + + + Name of the field to create comparator for. + + FieldComparator. + + IOException + If an error occurs reading the index. + + + + A {@link Filter} that only accepts documents whose single + term value in the specified field is contained in the + provided set of allowed terms. + +

+ + This is the same functionality as TermsFilter (from + contrib/queries), except this filter requires that the + field contains only a single term for all documents. + Because of drastically different implementations, they + also have different performance characteristics, as + described below. + +

+ + The first invocation of this filter on a given field will + be slower, since a {@link FieldCache.StringIndex} must be + created. Subsequent invocations using the same field + will re-use this cache. However, as with all + functionality based on {@link FieldCache}, persistent RAM + is consumed to hold the cache, and is not freed until the + {@link IndexReader} is closed. In contrast, TermsFilter + has no persistent RAM consumption. + + +

+ + With each search, this filter translates the specified + set of Terms into a private {@link OpenBitSet} keyed by + term number per unique {@link IndexReader} (normally one + reader per segment). Then, during matching, the term + number for each docID is retrieved from the cache and + then checked for inclusion using the {@link OpenBitSet}. + Since all testing is done using RAM resident data + structures, performance should be very fast, most likely + fast enough to not require further caching of the + DocIdSet for each possible combination of terms. + However, because docIDs are simply scanned linearly, an + index with a great many small documents may find this + linear scan too costly. + +

+ + In contrast, TermsFilter builds up an {@link OpenBitSet}, + keyed by docID, every time it's created, by enumerating + through all matching docs using {@link TermDocs} to seek + and scan through each term's docID list. While there is + no linear scan of all docIDs, besides the allocation of + the underlying array in the {@link OpenBitSet}, this + approach requires a number of "disk seeks" in proportion + to the number of terms, which can be exceptionally costly + when there are cache misses in the OS's IO cache. + +

+ + Generally, this filter will be slower on the first + invocation for a given field, but subsequent invocations, + even if you change the allowed set of Terms, should be + faster than TermsFilter, especially as the number of + Terms being matched increases. If you are matching only + a very small number of terms, and those terms in turn + match a very small number of documents, TermsFilter may + perform faster. + +

+ + Which filter is best is very application dependent. +

+
+ + A DocIdSet contains a set of doc ids. Implementing classes must + only implement {@link #iterator} to provide access to the set. + + + + An empty {@code DocIdSet} instance for easy use, e.g. in Filters that hit no documents. + + + Provides a {@link DocIdSetIterator} to access the set. + This implementation can return null or + {@linkplain #EMPTY_DOCIDSET}.iterator() if there + are no docs that match. + + + + This method is a hint for {@link CachingWrapperFilter}, if this DocIdSet + should be cached without copying it into a BitSet. The default is to return + false. If you have an own DocIdSet implementation + that does its iteration very effective and fast without doing disk I/O, + override this method and return true. + + + + This abstract class defines methods to iterate over a set of non-decreasing + doc ids. Note that this class assumes it iterates on doc Ids, and therefore + {@link #NO_MORE_DOCS} is set to {@value #NO_MORE_DOCS} in order to be used as + a sentinel object. Implementations of this class are expected to consider + {@link Integer#MAX_VALUE} as an invalid value. + + + + When returned by {@link #NextDoc()}, {@link #Advance(int)} and + {@link #Doc()} it means there are no more docs in the iterator. + + + + Unsupported anymore. Call {@link #DocID()} instead. This method throws + {@link UnsupportedOperationException} if called. + + + use {@link #DocID()} instead. + + + + Returns the following: +
    +
  • -1 or {@link #NO_MORE_DOCS} if {@link #NextDoc()} or + {@link #Advance(int)} were not called yet.
  • +
  • {@link #NO_MORE_DOCS} if the iterator has exhausted.
  • +
  • Otherwise it should return the doc ID it is currently on.
  • +
+

+ NOTE: in 3.0, this method will become abstract. + +

+ 2.9 + +
+ + Unsupported anymore. Call {@link #NextDoc()} instead. This method throws + {@link UnsupportedOperationException} if called. + + + use {@link #NextDoc()} instead. This will be removed in 3.0 + + + + Unsupported anymore. Call {@link #Advance(int)} instead. This method throws + {@link UnsupportedOperationException} if called. + + + use {@link #Advance(int)} instead. This will be removed in 3.0 + + + + Advances to the next document in the set and returns the doc it is + currently on, or {@link #NO_MORE_DOCS} if there are no more docs in the + set.
+ + NOTE: in 3.0 this method will become abstract, following the removal + of {@link #Next()}. For backward compatibility it is implemented as: + +
+            public int nextDoc() throws IOException {
+            return next() ? doc() : NO_MORE_DOCS;
+            }
+            
+ + NOTE: after the iterator has exhausted you should not call this + method, as it may result in unpredicted behavior. + +
+ 2.9 + +
+ + Advances to the first beyond the current whose document number is greater + than or equal to target. Returns the current document number or + {@link #NO_MORE_DOCS} if there are no more docs in the set. +

+ Behaves as if written: + +

+            int advance(int target) {
+            int doc;
+            while ((doc = nextDoc()) < target) {
+            }
+            return doc;
+            }
+            
+ + Some implementations are considerably more efficient than that. +

+ NOTE: certain implemenations may return a different value (each + time) if called several times in a row with the same target. +

+ NOTE: this method may be called with {@value #NO_MORE_DOCS} for + efficiency by some Scorers. If your implementation cannot efficiently + determine that it should exhaust, it is recommended that you check for that + value in each call to this method. +

+ NOTE: after the iterator has exhausted you should not call this + method, as it may result in unpredicted behavior. +

+ NOTE: in 3.0 this method will become abstract, following the removal + of {@link #SkipTo(int)}. + +

+ 2.9 + +
+ + This DocIdSet implementation is cacheable. + + + use {@link #DocID()} instead. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + A range query that returns a constant score equal to its boost for + all documents in the exclusive range of terms. + +

It does not have an upper bound on the number of clauses covered in the range. + +

This query matches the documents looking for terms that fall into the + supplied range according to {@link String#compareTo(String)}. It is not intended + for numerical ranges, use {@link NumericRangeQuery} instead. + +

This query is hardwired to {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}. + If you want to change this, use {@link TermRangeQuery} instead. + +

+ Use {@link TermRangeQuery} for term ranges or + {@link NumericRangeQuery} for numeric ranges instead. + This class will be removed in Lucene 3.0. + + $Id: ConstantScoreRangeQuery.java 797694 2009-07-25 00:03:33Z mikemccand $ + +
+ + A Query that matches documents within an exclusive range of terms. + +

This query matches the documents looking for terms that fall into the + supplied range according to {@link String#compareTo(String)}. It is not intended + for numerical ranges, use {@link NumericRangeQuery} instead. + +

This query uses the {@link + MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} + rewrite method. +

+ 2.9 + +
+ + An abstract {@link Query} that matches documents + containing a subset of terms provided by a {@link + FilteredTermEnum} enumeration. + +

This query cannot be used directly; you must subclass + it and define {@link #getEnum} to provide a {@link + FilteredTermEnum} that iterates through the terms to be + matched. + +

NOTE: if {@link #setRewriteMethod} is either + {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link + #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a + {@link BooleanQuery.TooManyClauses} exception during + searching, which happens when the number of terms to be + searched exceeds {@link + BooleanQuery#GetMaxClauseCount()}. Setting {@link + #setRewriteMethod} to {@link #CONSTANT_SCORE_FILTER_REWRITE} + prevents this. + +

The recommended rewrite method is {@link + #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU + computing unhelpful scores, and it tries to pick the most + performant rewrite method given the query. + + Note that {@link QueryParser} produces + MultiTermQueries using {@link + #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} by default. +

+
+ + The abstract base class for queries. +

Instantiable subclasses are: +

    +
  • {@link TermQuery}
  • +
  • {@link MultiTermQuery}
  • +
  • {@link BooleanQuery}
  • +
  • {@link WildcardQuery}
  • +
  • {@link PhraseQuery}
  • +
  • {@link PrefixQuery}
  • +
  • {@link MultiPhraseQuery}
  • +
  • {@link FuzzyQuery}
  • +
  • {@link TermRangeQuery}
  • +
  • {@link NumericRangeQuery}
  • +
  • {@link Lucene.Net.Search.Spans.SpanQuery}
  • +
+

A parser for queries is contained in: +

    +
  • {@link Lucene.Net.QueryParsers.QueryParser QueryParser}
  • +
+
+
+ + Sets the boost for this query clause to b. Documents + matching this clause will (in addition to the normal weightings) have + their score multiplied by b. + + + + Gets the boost for this clause. Documents matching + this clause will (in addition to the normal weightings) have their score + multiplied by b. The boost is 1.0 by default. + + + + Prints a query to a string, with field assumed to be the + default field and omitted. +

The representation used is one that is supposed to be readable + by {@link Lucene.Net.QueryParsers.QueryParser QueryParser}. However, + there are the following limitations: +

    +
  • If the query was created by the parser, the printed + representation may not be exactly what was parsed. For example, + characters that need to be escaped will be represented without + the required backslash.
  • +
  • Some of the more complicated queries (e.g. span queries) + don't have a representation that can be parsed by QueryParser.
  • +
+
+
+ + Prints a query to a string. + + + Expert: Constructs an appropriate Weight implementation for this query. + +

+ Only implemented by primitive queries, which re-write to themselves. +

+
+ + Expert: Constructs and initializes a Weight for a top-level query. + + + Expert: called to re-write queries into primitive queries. For example, + a PrefixQuery will be rewritten into a BooleanQuery that consists + of TermQuerys. + + + + Expert: called when re-writing queries under MultiSearcher. + + Create a single query suitable for use by all subsearchers (in 1-1 + correspondence with queries). This is an optimization of the OR of + all queries. We handle the common optimization cases of equal + queries and overlapping clauses of boolean OR queries (as generated + by MultiTermQuery.rewrite()). + Be careful overriding this method as queries[0] determines which + method will be called and is not necessarily of the same type as + the other queries. + + + + Expert: adds all terms occuring in this query to the terms set. Only + works if this query is in its {@link #rewrite rewritten} form. + + + UnsupportedOperationException if this query is not yet rewritten + + + Expert: merges the clauses of a set of BooleanQuery's into a single + BooleanQuery. + +

A utility for use by {@link #Combine(Query[])} implementations. +

+
+ + Expert: Returns the Similarity implementation to be used for this query. + Subclasses may override this method to specify their own Similarity + implementation, perhaps one that delegates through that of the Searcher. + By default the Searcher's Similarity implementation is returned. + + + + Returns a clone of this query. + + + A rewrite method that first creates a private Filter, + by visiting each term in sequence and marking all docs + for that term. Matching documents are assigned a + constant score equal to the query's boost. + +

This method is faster than the BooleanQuery + rewrite methods when the number of matched terms or + matched documents is non-trivial. Also, it will never + hit an errant {@link BooleanQuery.TooManyClauses} + exception. + +

+ + +
+ + A rewrite method that first translates each term into + {@link BooleanClause.Occur#SHOULD} clause in a + BooleanQuery, and keeps the scores as computed by the + query. Note that typically such scores are + meaningless to the user, and require non-trivial CPU + to compute, so it's almost always better to use {@link + #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead. + +

NOTE: This rewrite method will hit {@link + BooleanQuery.TooManyClauses} if the number of terms + exceeds {@link BooleanQuery#getMaxClauseCount}. + +

+ + +
+ + Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except + scores are not computed. Instead, each matching + document receives a constant score equal to the + query's boost. + +

NOTE: This rewrite method will hit {@link + BooleanQuery.TooManyClauses} if the number of terms + exceeds {@link BooleanQuery#getMaxClauseCount}. + +

+ + +
+ + Read-only default instance of {@link + ConstantScoreAutoRewrite}, with {@link + ConstantScoreAutoRewrite#setTermCountCutoff} set to + {@link + ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF} + and {@link + ConstantScoreAutoRewrite#setDocCountPercent} set to + {@link + ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}. + Note that you cannot alter the configuration of this + instance; you'll need to create a private instance + instead. + + + + Constructs a query for terms matching term. + check sub class for possible term access - the Term does not + make sense for all MultiTermQuerys and will be removed. + + + + Constructs a query matching terms that cannot be represented with a single + Term. + + + + Returns the pattern term. + check sub class for possible term access - getTerm does not + make sense for all MultiTermQuerys and will be removed. + + + + Construct the enumeration to be used, expanding the pattern term. + + + Expert: Return the number of unique terms visited during execution of the query. + If there are many of them, you may consider using another query type + or optimize your total term count in index. +

This method is not thread safe, be sure to only call it when no query is running! + If you re-use the same query instance for another + search, be sure to first reset the term counter + with {@link #clearTotalNumberOfTerms}. +

On optimized indexes / no MultiReaders, you get the correct number of + unique terms for the whole index. Use this number to compare different queries. + For non-optimized indexes this number can also be achived in + non-constant-score mode. In constant-score mode you get the total number of + terms seeked for all segments / sub-readers. +

+ + +
+ + Expert: Resets the counting of unique terms. + Do this before executing the query/filter. + + + + + + + + + + Sets the rewrite method to be used when executing the + query. You can use one of the four core methods, or + implement your own subclass of {@link RewriteMethod}. + + + + A rewrite method that tries to pick the best + constant-score rewrite method based on term and + document counts from the query. If both the number of + terms and documents is small enough, then {@link + #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used. + Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is + used. + + + + Abstract class that defines how the query is rewritten. + + + If the number of terms in this query is equal to or + larger than this setting then {@link + #CONSTANT_SCORE_FILTER_REWRITE} is used. + + + + + + + + If the number of documents to be visited in the + postings exceeds this specified percentage of the + maxDoc() for the index, then {@link + #CONSTANT_SCORE_FILTER_REWRITE} is used. + + 0.0 to 100.0 + + + + + + + + Constructs a query selecting all terms greater/equal than lowerTerm + but less/equal than upperTerm. + +

+ If an endpoint is null, it is said + to be "open". Either or both endpoints may be open. Open endpoints may not + be exclusive (you can't select all but the first or last term without + explicitly specifying the term to exclude.) + +

+ The field that holds both lower and upper terms. + + The term text at the lower end of the range + + The term text at the upper end of the range + + If true, the lowerTerm is + included in the range. + + If true, the upperTerm is + included in the range. + +
+ + Constructs a query selecting all terms greater/equal than + lowerTerm but less/equal than upperTerm. +

+ If an endpoint is null, it is said + to be "open". Either or both endpoints may be open. Open endpoints may not + be exclusive (you can't select all but the first or last term without + explicitly specifying the term to exclude.) +

+ If collator is not null, it will be used to decide whether + index terms are within the given range, rather than using the Unicode code + point order in which index terms are stored. +

+ WARNING: Using this constructor and supplying a non-null + value in the collator parameter will cause every single + index Term in the Field referenced by lowerTerm and/or upperTerm to be + examined. Depending on the number of index Terms in this Field, the + operation could be very slow. + +

+ The Term text at the lower end of the range + + The Term text at the upper end of the range + + If true, the lowerTerm is + included in the range. + + If true, the upperTerm is + included in the range. + + The collator to use to collate index Terms, to determine + their membership in the range bounded by lowerTerm and + upperTerm. + +
+ + Returns the field name for this query + + + Returns the lower value of this range query + + + Returns the upper value of this range query + + + Returns true if the lower endpoint is inclusive + + + Returns true if the upper endpoint is inclusive + + + Returns the collator used to determine range inclusion, if any. + + + Prints a user-readable version of this query. + + + Changes of mode are not supported by this class (fixed to constant score rewrite mode) + + + Lucene's package information, including version. * + + + The TermVectorMapper can be used to map Term Vectors into your own + structure instead of the parallel array structure used by + {@link Lucene.Net.Index.IndexReader#GetTermFreqVector(int,String)}. +

+ It is up to the implementation to make sure it is thread-safe. + + + +

+
+ + + true if this mapper should tell Lucene to ignore positions even if they are stored + + similar to ignoringPositions + + + + Tell the mapper what to expect in regards to field, number of terms, offset and position storage. + This method will be called once before retrieving the vector for a field. + + This method will be called before {@link #Map(String,int,TermVectorOffsetInfo[],int[])}. + + The field the vector is for + + The number of terms that need to be mapped + + true if the mapper should expect offset information + + true if the mapper should expect positions info + + + + Map the Term Vector information into your own structure + The term to add to the vector + + The frequency of the term in the document + + null if the offset is not specified, otherwise the offset into the field of the term + + null if the position is not specified, otherwise the position in the field of the term + + + + Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and they + can be skipped over. Derived classes should set this to true if they want to ignore positions. The default + is false, meaning positions will be loaded if they are stored. + + false + + + + + Same principal as {@link #IsIgnoringPositions()}, but applied to offsets. false by default. + + false + + + + Passes down the index of the document whose term vector is currently being mapped, + once for each top level call to a term vector reader. +

+ Default implementation IGNORES the document number. Override if your implementation needs the document number. +

+ NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations. + +

+ index of document currently being mapped + +
+ + The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, + into a single Segment. After adding the appropriate readers, call the merge method to combine the + segments. +

+ If the compoundFile flag is set, then the segments will be merged into a compound file. + + +

+ + + + +
+ + Maximum number of contiguous documents to bulk-copy + when merging stored fields + + + + norms header placeholder + + + This ctor used only by test code. + + + The Directory to merge the other segments into + + The name of the new segment + + + + Add an IndexReader to the collection of readers that are to be merged + + + + + + The index of the reader to return + + The ith reader to be merged + + + + Merges the readers specified by the {@link #add} method into the directory passed to the constructor + The number of documents that were merged + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Merges the readers specified by the {@link #add} method + into the directory passed to the constructor. + + if false, we will not merge the + stored fields nor vectors files + + The number of documents that were merged + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + close all IndexReaders that have been added. + Should not be called before merge(). + + IOException + + + + The number of documents in all of the readers + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Merge the TermVectors from each of the segments into the new one. + IOException + + + Process postings from multiple segments all positioned on the + same term. Writes out merged entries into freqOutput and + the proxOutput streams. + + + array of segments + + number of cells in the array actually occupied + + number of documents across all segments where this term was found + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Records the fact that roughly units amount of work + have been done since this method was last called. + When adding time-consuming code into SegmentMerger, + you should test different values for units to ensure + that the time in between calls to merge.checkAborted + is up to ~ 1 second. + + + + Similar to a {@link java.io.FileFilter}, the FieldSelector allows one to make decisions about + what Fields get loaded on a {@link Document} by {@link Lucene.Net.Index.IndexReader#Document(int,Lucene.Net.Documents.FieldSelector)} + + + + + + + the field to accept or reject + + an instance of {@link FieldSelectorResult} + if the {@link Field} named fieldName should be loaded. + + + + $Id + +

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+

+
+ + IndexReader is an abstract class, providing an interface for accessing an + index. Search of an index is done entirely through this abstract interface, + so that any subclass which implements it is searchable. +

Concrete subclasses of IndexReader are usually constructed with a call to + one of the static open() methods, e.g. {@link + #Open(String, boolean)}. +

For efficiency, in this API documents are often referred to via + document numbers, non-negative integers which each name a unique + document in the index. These document numbers are ephemeral--they may change + as documents are added to and deleted from an index. Clients should thus not + rely on a given document having the same number between sessions. +

An IndexReader can be opened on a directory for which an IndexWriter is + opened already, but it cannot be used to delete documents from the index then. +

+ NOTE: for backwards API compatibility, several methods are not listed + as abstract, but have no useful implementations in this base class and + instead always throw UnsupportedOperationException. Subclasses are + strongly encouraged to override these methods, but in many cases may not + need to. +

+

+ NOTE: as of 2.4, it's possible to open a read-only + IndexReader using one of the static open methods that + accepts the boolean readOnly parameter. Such a reader has + better concurrency as it's not necessary to synchronize on + the isDeleted method. Currently the default for readOnly + is false, meaning if not specified you will get a + read/write IndexReader. But in 3.0 this default will + change to true, meaning you must explicitly specify false + if you want to make changes with the resulting IndexReader. +

+

NOTE: {@link + IndexReader} instances are completely thread + safe, meaning multiple threads can call any of its methods, + concurrently. If your application requires external + synchronization, you should not synchronize on the + IndexReader instance; use your own + (non-Lucene) objects instead. +

+ $Id: IndexReader.java 826049 2009-10-16 19:28:55Z mikemccand $ + +
+ + Expert: returns the current refCount for this reader + + + Expert: increments the refCount of this IndexReader + instance. RefCounts are used to determine when a + reader can be closed safely, i.e. as soon as there are + no more references. Be sure to always call a + corresponding {@link #decRef}, in a finally clause; + otherwise the reader may never be closed. Note that + {@link #close} simply calls decRef(), which means that + the IndexReader will not really be closed until {@link + #decRef} has been called for all outstanding + references. + + + + + + + Expert: decreases the refCount of this IndexReader + instance. If the refCount drops to 0, then pending + changes (if any) are committed to the index and this + reader is closed. + + + IOException in case an IOException occurs in commit() or doClose() + + + + + + + will be deleted when IndexReader(Directory) is deleted + + + + + + Legacy Constructor for backwards compatibility. + +

+ This Constructor should not be used, it exists for backwards + compatibility only to support legacy subclasses that did not "own" + a specific directory, but needed to specify something to be returned + by the directory() method. Future subclasses should delegate to the + no arg constructor and implement the directory() method as appropriate. + +

+ Directory to be returned by the directory() method + + + + - use IndexReader() + +
+ + AlreadyClosedException if this IndexReader is closed + + + Returns a read/write IndexReader reading the index in an FSDirectory in the named + path. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + Use {@link #Open(Directory, boolean)} instead. + This method will be removed in the 3.0 release. + + + the path to the index directory + + + + Returns an IndexReader reading the index in an + FSDirectory in the named path. You should pass + readOnly=true, since it gives much better concurrent + performance, unless you intend to do write operations + (delete documents or change norms) with the reader. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + the path to the index directory + + true if this should be a readOnly + reader + + Use {@link #Open(Directory, boolean)} instead. + This method will be removed in the 3.0 release. + + + + + Returns a read/write IndexReader reading the index in an FSDirectory in the named + path. + + the path to the index directory + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + Use {@link #Open(Directory, boolean)} instead. + This method will be removed in the 3.0 release. + + + + + Returns an IndexReader reading the index in an + FSDirectory in the named path. You should pass + readOnly=true, since it gives much better concurrent + performance, unless you intend to do write operations + (delete documents or change norms) with the reader. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + the path to the index directory + + true if this should be a readOnly + reader + + Use {@link #Open(Directory, boolean)} instead. + This method will be removed in the 3.0 release. + + + + + Returns a read/write IndexReader reading the index in + the given Directory. + + the index directory + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + Use {@link #Open(Directory, boolean)} instead + This method will be removed in the 3.0 release. + + + + + Returns an IndexReader reading the index in the given + Directory. You should pass readOnly=true, since it + gives much better concurrent performance, unless you + intend to do write operations (delete documents or + change norms) with the reader. + + the index directory + + true if no changes (deletions, norms) will be made with this IndexReader + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Expert: returns a read/write IndexReader reading the index in the given + {@link IndexCommit}. + + the commit point to open + + CorruptIndexException if the index is corrupt + Use {@link #Open(IndexCommit, boolean)} instead. + This method will be removed in the 3.0 release. + + + IOException if there is a low-level IO error + + + Expert: returns an IndexReader reading the index in the given + {@link IndexCommit}. You should pass readOnly=true, since it + gives much better concurrent performance, unless you + intend to do write operations (delete documents or + change norms) with the reader. + + the commit point to open + + true if no changes (deletions, norms) will be made with this IndexReader + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Expert: returns a read/write IndexReader reading the index in the given + Directory, with a custom {@link IndexDeletionPolicy}. + + the index directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see {@link IndexWriter} for details. + + Use {@link #Open(Directory, IndexDeletionPolicy, boolean)} instead. + This method will be removed in the 3.0 release. + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Expert: returns an IndexReader reading the index in + the given Directory, with a custom {@link + IndexDeletionPolicy}. You should pass readOnly=true, + since it gives much better concurrent performance, + unless you intend to do write operations (delete + documents or change norms) with the reader. + + the index directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see {@link IndexWriter} for details. + + true if no changes (deletions, norms) will be made with this IndexReader + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Expert: returns an IndexReader reading the index in + the given Directory, with a custom {@link + IndexDeletionPolicy}. You should pass readOnly=true, + since it gives much better concurrent performance, + unless you intend to do write operations (delete + documents or change norms) with the reader. + + the index directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see {@link IndexWriter} for details. + + true if no changes (deletions, norms) will be made with this IndexReader + + Subsamples which indexed + terms are loaded into RAM. This has the same effect as {@link + IndexWriter#setTermIndexInterval} except that setting + must be done at indexing time while this setting can be + set per reader. When set to N, then one in every + N*termIndexInterval terms in the index is loaded into + memory. By setting this to a value > 1 you can reduce + memory usage, at the expense of higher latency when + loading a TermInfo. The default value is 1. Set this + to -1 to skip loading the terms index entirely. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Expert: returns a read/write IndexReader reading the index in the given + Directory, using a specific commit and with a custom + {@link IndexDeletionPolicy}. + + the specific {@link IndexCommit} to open; + see {@link IndexReader#listCommits} to list all commits + in a directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see {@link IndexWriter} for details. + + Use {@link #Open(IndexCommit, IndexDeletionPolicy, boolean)} instead. + This method will be removed in the 3.0 release. + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Expert: returns an IndexReader reading the index in + the given Directory, using a specific commit and with + a custom {@link IndexDeletionPolicy}. You should pass + readOnly=true, since it gives much better concurrent + performance, unless you intend to do write operations + (delete documents or change norms) with the reader. + + the specific {@link IndexCommit} to open; + see {@link IndexReader#listCommits} to list all commits + in a directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see {@link IndexWriter} for details. + + true if no changes (deletions, norms) will be made with this IndexReader + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Expert: returns an IndexReader reading the index in + the given Directory, using a specific commit and with + a custom {@link IndexDeletionPolicy}. You should pass + readOnly=true, since it gives much better concurrent + performance, unless you intend to do write operations + (delete documents or change norms) with the reader. + + the specific {@link IndexCommit} to open; + see {@link IndexReader#listCommits} to list all commits + in a directory + + a custom deletion policy (only used + if you use this reader to perform deletes or to set + norms); see {@link IndexWriter} for details. + + true if no changes (deletions, norms) will be made with this IndexReader + + Subsambles which indexed + terms are loaded into RAM. This has the same effect as {@link + IndexWriter#setTermIndexInterval} except that setting + must be done at indexing time while this setting can be + set per reader. When set to N, then one in every + N*termIndexInterval terms in the index is loaded into + memory. By setting this to a value > 1 you can reduce + memory usage, at the expense of higher latency when + loading a TermInfo. The default value is 1. Set this + to -1 to skip loading the terms index entirely. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Refreshes an IndexReader if the index has changed since this instance + was (re)opened. +

+ Opening an IndexReader is an expensive operation. This method can be used + to refresh an existing IndexReader to reduce these costs. This method + tries to only load segments that have changed or were created after the + IndexReader was (re)opened. +

+ If the index has not changed since this instance was (re)opened, then this + call is a NOOP and returns this instance. Otherwise, a new instance is + returned. The old instance is not closed and remains usable.
+

+ If the reader is reopened, even though they share + resources internally, it's safe to make changes + (deletions, norms) with the new reader. All shared + mutable state obeys "copy on write" semantics to ensure + the changes are not seen by other readers. +

+ You can determine whether a reader was actually reopened by comparing the + old instance with the instance returned by this method: +

+            IndexReader reader = ... 
+            ...
+            IndexReader newReader = r.reopen();
+            if (newReader != reader) {
+            ...     // reader was reopened
+            reader.close(); 
+            }
+            reader = newReader;
+            ...
+            
+ + Be sure to synchronize that code so that other threads, + if present, can never use reader after it has been + closed and before it's switched to newReader. + +

NOTE: If this reader is a near real-time + reader (obtained from {@link IndexWriter#GetReader()}, + reopen() will simply call writer.getReader() again for + you, though this may change in the future. + +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Just like {@link #Reopen()}, except you can change the + readOnly of the original reader. If the index is + unchanged but readOnly is different then a new reader + will be returned. + + + + Expert: reopen this reader on a specific commit point. + This always returns a readOnly reader. If the + specified commit point matches what this reader is + already on, and this reader is already readOnly, then + this same instance is returned; if it is not already + readOnly, a readOnly clone is returned. + + + + Efficiently clones the IndexReader (sharing most + internal state). +

+ On cloning a reader with pending changes (deletions, + norms), the original reader transfers its write lock to + the cloned reader. This means only the cloned reader + may make further changes to the index, and commit the + changes to the index on close, but the old reader still + reflects all changes made up until it was cloned. +

+ Like {@link #Reopen()}, it's safe to make changes to + either the original or the cloned reader: all shared + mutable state obeys "copy on write" semantics to ensure + the changes are not seen by other readers. +

+

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Clones the IndexReader and optionally changes readOnly. A readOnly + reader cannot open a writeable reader. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Returns the directory associated with this index. The Default + implementation returns the directory specified by subclasses when + delegating to the IndexReader(Directory) constructor, or throws an + UnsupportedOperationException if one was not specified. + + UnsupportedOperationException if no directory + + + Returns the time the index in the named directory was last modified. + Do not use this to check whether the reader is still up-to-date, use + {@link #IsCurrent()} instead. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + Use {@link #LastModified(Directory)} instead. + This method will be removed in the 3.0 release. + + + + Returns the time the index in the named directory was last modified. + Do not use this to check whether the reader is still up-to-date, use + {@link #IsCurrent()} instead. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + Use {@link #LastModified(Directory)} instead. + This method will be removed in the 3.0 release. + + + + + Returns the time the index in the named directory was last modified. + Do not use this to check whether the reader is still up-to-date, use + {@link #IsCurrent()} instead. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Reads version number from segments files. The version number is + initialized with a timestamp and then increased by one for each change of + the index. + + + where the index resides. + + version number. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + Use {@link #GetCurrentVersion(Directory)} instead. + This method will be removed in the 3.0 release. + + + + Reads version number from segments files. The version number is + initialized with a timestamp and then increased by one for each change of + the index. + + + where the index resides. + + version number. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + Use {@link #GetCurrentVersion(Directory)} instead. + This method will be removed in the 3.0 release. + + + + Reads version number from segments files. The version number is + initialized with a timestamp and then increased by one for each change of + the index. + + + where the index resides. + + version number. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Reads commitUserData, previously passed to {@link + IndexWriter#Commit(Map)}, from current index + segments file. This will return null if {@link + IndexWriter#Commit(Map)} has never been called for + this index. + + + where the index resides. + + commit userData. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + + + + + Version number when this IndexReader was opened. Not implemented in the + IndexReader base class. + +

+ If this reader is based on a Directory (ie, was created by calling + {@link #Open}, or {@link #Reopen} on a reader based on a Directory), then + this method returns the version recorded in the commit that the reader + opened. This version is advanced every time {@link IndexWriter#Commit} is + called. +

+ +

+ If instead this reader is a near real-time reader (ie, obtained by a call + to {@link IndexWriter#GetReader}, or by calling {@link #Reopen} on a near + real-time reader), then this method returns the version of the last + commit done by the writer. Note that even as further changes are made + with the writer, the version will not changed until a commit is + completed. Thus, you should not rely on this method to determine when a + near real-time reader should be opened. Use {@link #IsCurrent} instead. +

+ +

+ UnsupportedOperationException + unless overridden in subclass + +
+ + Retrieve the String userData optionally passed to + IndexWriter#commit. This will return null if {@link + IndexWriter#Commit(Map)} has never been called for + this index. + + + + + + +

For IndexReader implementations that use + TermInfosReader to read terms, this sets the + indexDivisor to subsample the number of indexed terms + loaded into memory. This has the same effect as {@link + IndexWriter#setTermIndexInterval} except that setting + must be done at indexing time while this setting can be + set per reader. When set to N, then one in every + N*termIndexInterval terms in the index is loaded into + memory. By setting this to a value > 1 you can reduce + memory usage, at the expense of higher latency when + loading a TermInfo. The default value is 1.

+ + NOTE: you must call this before the term + index is loaded. If the index is already loaded, + an IllegalStateException is thrown. +

+ IllegalStateException if the term index has already been loaded into memory + Please use {@link IndexReader#Open(Directory, IndexDeletionPolicy, boolean, int)} to specify the required TermInfos index divisor instead. + +
+ +

For IndexReader implementations that use + TermInfosReader to read terms, this returns the + current indexDivisor as specified when the reader was + opened. +

+
+ + Check whether any new changes have occurred to the index since this + reader was opened. + +

+ If this reader is based on a Directory (ie, was created by calling + {@link #open}, or {@link #reopen} on a reader based on a Directory), then + this method checks if any further commits (see {@link IndexWriter#commit} + have occurred in that directory). +

+ +

+ If instead this reader is a near real-time reader (ie, obtained by a call + to {@link IndexWriter#getReader}, or by calling {@link #reopen} on a near + real-time reader), then this method checks if either a new commmit has + occurred, or any new uncommitted changes have taken place via the writer. + Note that even if the writer has only performed merging, this method will + still return false. +

+ +

+ In any event, if this returns false, you should call {@link #reopen} to + get a new reader that sees the changes. +

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + UnsupportedOperationException unless overridden in subclass +
+ + Checks is the index is optimized (if it has a single segment and + no deletions). Not implemented in the IndexReader base class. + + true if the index is optimized; false otherwise + + UnsupportedOperationException unless overridden in subclass + + + Return an array of term frequency vectors for the specified document. + The array contains a vector for each vectorized field in the document. + Each vector contains terms and frequencies for all terms in a given vectorized field. + If no such fields existed, the method returns null. The term vectors that are + returned may either be of type {@link TermFreqVector} + or of type {@link TermPositionVector} if + positions or offsets have been stored. + + + document for which term frequency vectors are returned + + array of term frequency vectors. May be null if no term vectors have been + stored for the specified document. + + IOException if index cannot be accessed + + + + + Return a term frequency vector for the specified document and field. The + returned vector contains terms and frequencies for the terms in + the specified field of this document, if the field had the storeTermVector + flag set. If termvectors had been stored with positions or offsets, a + {@link TermPositionVector} is returned. + + + document for which the term frequency vector is returned + + field for which the term frequency vector is returned. + + term frequency vector May be null if field does not exist in the specified + document or term vector was not stored. + + IOException if index cannot be accessed + + + + + Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of + the {@link TermFreqVector}. + + The number of the document to load the vector for + + The name of the field to load + + The {@link TermVectorMapper} to process the vector. Must not be null + + IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. + + + + + Map all the term vectors for all fields in a Document + The number of the document to load the vector for + + The {@link TermVectorMapper} to process the vector. Must not be null + + IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. + + + Returns true if an index exists at the specified directory. + If the directory does not exist or if there is no index in it. + false is returned. + + the directory to check for an index + + true if an index exists; false otherwise + + Use {@link #IndexExists(Directory)} instead + This method will be removed in the 3.0 release. + + + + + Returns true if an index exists at the specified directory. + If the directory does not exist or if there is no index in it. + + the directory to check for an index + + true if an index exists; false otherwise + + Use {@link #IndexExists(Directory)} instead. + This method will be removed in the 3.0 release. + + + + + Returns true if an index exists at the specified directory. + If the directory does not exist or if there is no index in it. + + the directory to check for an index + + true if an index exists; false otherwise + + IOException if there is a problem with accessing the index + + + Returns the number of documents in this index. + + + Returns one greater than the largest possible document number. + This may be used to, e.g., determine how big to allocate an array which + will have an element for every document number in an index. + + + + Returns the number of deleted documents. + + + Returns the stored fields of the nth + Document in this index. +

+ NOTE: for performance reasons, this method does not check if the + requested document is deleted, and therefore asking for a deleted document + may yield unspecified results. Usually this is not required, however you + can call {@link #IsDeleted(int)} with the requested document ID to verify + the document is not deleted. + +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Get the {@link Lucene.Net.Documents.Document} at the n + th position. The {@link FieldSelector} may be used to determine + what {@link Lucene.Net.Documents.Field}s to load and how they should + be loaded. NOTE: If this Reader (more specifically, the underlying + FieldsReader) is closed before the lazy + {@link Lucene.Net.Documents.Field} is loaded an exception may be + thrown. If you want the value of a lazy + {@link Lucene.Net.Documents.Field} to be available after closing you + must explicitly load it or fetch the Document again with a new loader. +

+ NOTE: for performance reasons, this method does not check if the + requested document is deleted, and therefore asking for a deleted document + may yield unspecified results. Usually this is not required, however you + can call {@link #IsDeleted(int)} with the requested document ID to verify + the document is not deleted. + +

+ Get the document at the nth position + + The {@link FieldSelector} to use to determine what + Fields should be loaded on the Document. May be null, in which case + all Fields will be loaded. + + The stored fields of the + {@link Lucene.Net.Documents.Document} at the nth position + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + + + + + + +
+ + Returns true if document n has been deleted + + + Returns true if any documents have been deleted + + + Returns true if there are norms stored for this field. + + + Returns the byte-encoded normalization factor for the named field of + every document. This is used by the search code to score documents. + + + + + + + Reads the byte-encoded normalization factor for the named field of every + document. This is used by the search code to score documents. + + + + + + + Expert: Resets the normalization factor for the named field of the named + document. The norm represents the product of the field's {@link + Lucene.Net.Documents.Fieldable#SetBoost(float) boost} and its {@link Similarity#LengthNorm(String, + int) length normalization}. Thus, to preserve the length normalization + values when resetting this, one should base the new value upon the old. + + NOTE: If this field does not store norms, then + this method call will silently do nothing. + + + + + + + StaleReaderException if the index has changed + since this reader was opened + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Implements setNorm in subclass. + + + Expert: Resets the normalization factor for the named field of the named + document. + + + + + + + + StaleReaderException if the index has changed + since this reader was opened + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Returns an enumeration of all the terms in the index. The + enumeration is ordered by Term.compareTo(). Each term is greater + than all that precede it in the enumeration. Note that after + calling terms(), {@link TermEnum#Next()} must be called + on the resulting enumeration before calling other methods such as + {@link TermEnum#Term()}. + + IOException if there is a low-level IO error + + + Returns an enumeration of all terms starting at a given term. If + the given term does not exist, the enumeration is positioned at the + first term greater than the supplied term. The enumeration is + ordered by Term.compareTo(). Each term is greater than all that + precede it in the enumeration. + + IOException if there is a low-level IO error + + + Returns the number of documents containing the term t. + IOException if there is a low-level IO error + + + Returns an enumeration of all the documents which contain + term. For each document, the document number, the frequency of + the term in that document is also provided, for use in + search scoring. If term is null, then all non-deleted + docs are returned with freq=1. + Thus, this method implements the mapping: +

    + Term    =>    <docNum, freq>* +
+

The enumeration is ordered by document number. Each document number + is greater than all that precede it in the enumeration. +

+ IOException if there is a low-level IO error +
+ + Returns an unpositioned {@link TermDocs} enumerator. + IOException if there is a low-level IO error + + + Returns an enumeration of all the documents which contain + term. For each document, in addition to the document number + and frequency of the term in that document, a list of all of the ordinal + positions of the term in the document is available. Thus, this method + implements the mapping: + +

    + Term    =>    <docNum, freq, + <pos1, pos2, ... + posfreq-1> + >* +
+

This positional information facilitates phrase and proximity searching. +

The enumeration is ordered by document number. Each document number is + greater than all that precede it in the enumeration. +

+ IOException if there is a low-level IO error +
+ + Returns an unpositioned {@link TermPositions} enumerator. + IOException if there is a low-level IO error + + + Deletes the document numbered docNum. Once a document is + deleted it will not appear in TermDocs or TermPostitions enumerations. + Attempts to read its field with the {@link #document} + method will result in an error. The presence of this document may still be + reflected in the {@link #docFreq} statistic, though + this will be corrected eventually as the index is further modified. + + + StaleReaderException if the index has changed + since this reader was opened + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Implements deletion of the document numbered docNum. + Applications should call {@link #DeleteDocument(int)} or {@link #DeleteDocuments(Term)}. + + + + Deletes all documents that have a given term indexed. + This is useful if one uses a document field to hold a unique ID string for + the document. Then to delete such a document, one merely constructs a + term with the appropriate field and the unique ID string as its text and + passes it to this method. + See {@link #DeleteDocument(int)} for information about when this deletion will + become effective. + + + the number of documents deleted + + StaleReaderException if the index has changed + since this reader was opened + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Undeletes all documents currently marked as deleted in this index. + + + StaleReaderException if the index has changed + since this reader was opened + + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Implements actual undeleteAll() in subclass. + + + Does nothing by default. Subclasses that require a write lock for + index modifications must implement this method. + + + + + IOException + + + Opaque Map (String -> String) + that's recorded into the segments file in the index, + and retrievable by {@link + IndexReader#getCommitUserData}. + + IOException + + + Commit changes resulting from delete, undeleteAll, or + setNorm operations + + If an exception is hit, then either no changes or all + changes will have been committed to the index + (transactional semantics). + + IOException if there is a low-level IO error + + + Commit changes resulting from delete, undeleteAll, or + setNorm operations + + If an exception is hit, then either no changes or all + changes will have been committed to the index + (transactional semantics). + + IOException if there is a low-level IO error + + + Implements commit. + Please implement {@link #DoCommit(Map) + instead}. + + + + Implements commit. NOTE: subclasses should override + this. In 3.0 this will become an abstract method. + + + + Closes files associated with this index. + Also saves any new deletions to disk. + No other methods should be called after this has been called. + + IOException if there is a low-level IO error + + + Implements close. + + + Get a list of unique field names that exist in this index and have the specified + field option information. + + specifies which field option should be available for the returned fields + + Collection of Strings indicating the names of the fields. + + + + + + Returns true iff the index in the named directory is + currently locked. + + the directory to check for a lock + + IOException if there is a low-level IO error + Please use {@link IndexWriter#IsLocked(Directory)} instead. + This method will be removed in the 3.0 release. + + + + + Returns true iff the index in the named directory is + currently locked. + + the directory to check for a lock + + IOException if there is a low-level IO error + Use {@link #IsLocked(Directory)} instead. + This method will be removed in the 3.0 release. + + + + + Forcibly unlocks the index in the named directory. +

+ Caution: this should only be used by failure recovery code, + when it is known that no other process nor thread is in fact + currently accessing this index. +

+ Please use {@link IndexWriter#Unlock(Directory)} instead. + This method will be removed in the 3.0 release. + + +
+ + Expert: return the IndexCommit that this reader has + opened. This method is only implemented by those + readers that correspond to a Directory with its own + segments_N file. + +

WARNING: this API is new and experimental and + may suddenly change.

+

+
+ + Prints the filename and size of each file within a given compound file. + Add the -extract flag to extract files to the current working directory. + In order to make the extracted version of the index work, you have to copy + the segments file from the compound index into the directory where the extracted files are stored. + + Usage: Lucene.Net.Index.IndexReader [-extract] <cfsfile> + + + + Returns all commit points that exist in the Directory. + Normally, because the default is {@link + KeepOnlyLastCommitDeletionPolicy}, there would be only + one commit point. But if you're using a custom {@link + IndexDeletionPolicy} then there could be many commits. + Once you have a given commit, you can open a reader on + it by calling {@link IndexReader#Open(IndexCommit)} + There must be at least one commit in + the Directory, else this method throws {@link + java.io.IOException}. Note that if a commit is in + progress while this method is running, that commit + may or may not be returned array. + + + + Expert: returns the sequential sub readers that this + reader is logically composed of. For example, + IndexSearcher uses this API to drive searching by one + sub reader at a time. If this reader is not composed + of sequential child readers, it should return null. + If this method returns an empty array, that means this + reader is a null reader (for example a MultiReader + that has no sub readers). +

+ NOTE: You should not try using sub-readers returned by + this method to make any changes (setNorm, deleteDocument, + etc.). While this might succeed for one composite reader + (like MultiReader), it will most likely lead to index + corruption for other readers (like DirectoryReader obtained + through {@link #open}. Use the parent reader directly. +

+
+ + Expert + + + + + Returns the number of unique terms (across all fields) + in this reader. + + This method returns long, even though internally + Lucene cannot handle more than 2^31 unique terms, for + a possible future when this limitation is removed. + + + UnsupportedOperationException if this count + cannot be easily determined (eg Multi*Readers). + Instead, you should call {@link + #getSequentialSubReaders} and ask each sub reader for + its unique term count. + + + + Expert: Return the state of the flag that disables fakes norms in favor of representing the absence of field norms with null. + true if fake norms are disabled + + This currently defaults to false (to remain + back-compatible), but in 3.0 it will be hardwired to + true, meaning the norms() methods will return null for + fields that had disabled norms. + + + + Expert: Set the state of the flag that disables fakes norms in favor of representing the absence of field norms with null. + true to disable fake norms, false to preserve the legacy behavior + + This currently defaults to false (to remain + back-compatible), but in 3.0 it will be hardwired to + true, meaning the norms() methods will return null for + fields that had disabled norms. + + + + Utility class for executing code that needs to do + something with the current segments file. This is + necessary with lock-less commits because from the time + you locate the current segments file name, until you + actually open it, read its contents, or check modified + time, etc., it could have been deleted due to a writer + commit finishing. + + + + A collection of segmentInfo objects with methods for operating on + those segments in relation to the file system. + +

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+

+
+ + The file format version, a negative number. + + + This format adds details used for lockless commits. It differs + slightly from the previous format in that file names + are never re-used (write once). Instead, each file is + written to the next generation. For example, + segments_1, segments_2, etc. This allows us to not use + a commit lock. See file + formats for details. + + + + This format adds a "hasSingleNormFile" flag into each segment info. + See LUCENE-756 + for details. + + + + This format allows multiple segments to share a single + vectors and stored fields file. + + + + This format adds a checksum at the end of the file to + ensure all bytes were successfully written. + + + + This format adds the deletion count for each segment. + This way IndexWriter can efficiently report numDocs(). + + + + This format adds the boolean hasProx to record if any + fields in the segment store prox information (ie, have + omitTermFreqAndPositions==false) + + + + This format adds optional commit userData (String) storage. + + + This format adds optional per-segment String + dianostics storage, and switches userData to Map + + + + counts how often the index has been changed by adding or deleting docs. + starting with the current time in milliseconds forces to create unique version numbers. + + + + If non-null, information about loading segments_N files + + + + + Get the generation (N) of the current segments_N file + from a list of files. + + + -- array of file names to check + + + + Get the generation (N) of the current segments_N file + in the directory. + + + -- directory to search for the latest segments_N file + + + + Get the filename of the current segments_N file + from a list of files. + + + -- array of file names to check + + + + Get the filename of the current segments_N file + in the directory. + + + -- directory to search for the latest segments_N file + + + + Get the segments_N filename in use by this segment infos. + + + Parse the generation off the segments file name and + return it. + + + + Get the next segments_N filename that will be written. + + + Read a particular segmentFileName. Note that this may + throw an IOException if a commit is in process. + + + -- directory containing the segments file + + -- segment file to load + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + This version of read uses the retry logic (for lock-less + commits) to find the right segments file to load. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Returns a copy of this instance, also copying each + SegmentInfo. + + + + version number when this SegmentInfos was generated. + + + Current version number from segments file. + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Returns userData from latest segments file + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + If non-null, information about retries when loading + the segments file will be printed to this. + + + + Advanced: set how many times to try loading the + segments.gen file contents to determine current segment + generation. This file is only referenced when the + primary method (listing the directory) fails. + + + + + + + + Advanced: set how many milliseconds to pause in between + attempts to load the segments.gen file. + + + + + + + + Advanced: set how many times to try incrementing the + gen when loading the segments file. This only runs if + the primary (listing directory) and secondary (opening + segments.gen file) methods fail to find the segments + file. + + + + + + + + + + + + Returns a new SegmentInfos containg the SegmentInfo + instances in the specified range first (inclusive) to + last (exclusive), so total number of segments returned + is last-first. + + + + Call this to start a commit. This writes the new + segments file, but writes an invalid checksum at the + end, so that it is not visible to readers. Once this + is called you must call {@link #finishCommit} to complete + the commit or {@link #rollbackCommit} to abort it. + + + + Returns all file names referenced by SegmentInfo + instances matching the provided Directory (ie files + associated with any "external" segments are skipped). + The returned collection is recomputed on each + invocation. + + + + Writes & syncs to the Directory dir, taking care to + remove the segments file on exception + + + + Replaces all segments in this instance, but keeps + generation, version, counter so that future commits + remain write once. + + + + + Simple brute force implementation. + If size is equal, compare items one by one. + + SegmentInfos object to check equality for + true if lists are equal, false otherwise + + + + Calculate hash code of SegmentInfos + + hash code as in java version of ArrayList + + + Utility class for executing code that needs to do + something with the current segments file. This is + necessary with lock-less commits because from the time + you locate the current segments file name, until you + actually open it, read its contents, or check modified + time, etc., it could have been deleted due to a writer + commit finishing. + + + + Subclass must implement this. The assumption is an + IOException will be thrown if something goes wrong + during the processing that could have been caused by + a writer committing. + + + + Constants describing field properties, for example used for + {@link IndexReader#GetFieldNames(FieldOption)}. + + + + All fields + + + All indexed fields + + + All fields that store payloads + + + All fields that omit tf + + + Renamed to {@link #OMIT_TERM_FREQ_AND_POSITIONS} + + + + All fields which are not indexed + + + All fields which are indexed with termvectors enabled + + + All fields which are indexed but don't have termvectors enabled + + + All fields with termvectors enabled. Please note that only standard termvector fields are returned + + + All fields with termvectors with position values enabled + + + All fields with termvectors with offset values enabled + + + All fields with termvectors with offset values and position values enabled + + + The class which implements SegmentReader. + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Clones the norm bytes. May be overridden by subclasses. New and experimental. + Byte array to clone + + New BitVector + + + + Clones the deleteDocs BitVector. May be overridden by subclasses. New and experimental. + BitVector to clone + + New BitVector + + + + + + + + + + + + Read norms into a pre-allocated array. + + + Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. + TermVectorsReader + + + + Return a term frequency vector for the specified document and field. The + vector returned contains term numbers and frequencies for all terms in + the specified field of this document, if the field had storeTermVector + flag set. If the flag was not set, the method returns null. + + IOException + + + Return an array of term frequency vectors for the specified document. + The array contains a vector for each vectorized field in the document. + Each vector vector contains term numbers and frequencies for all terms + in a given vectorized field. + If no such fields existed, the method returns null. + + IOException + + + Return the name of the segment this reader is reading. + + + Return the SegmentInfo of the segment this reader is reading. + + + Returns the directory this index resides in. + + + Lotsa tests did hacks like:
+ SegmentReader reader = (SegmentReader) IndexReader.open(dir);
+ They broke. This method serves as a hack to keep hacks working +
+
+ + Sets the initial value + + + Java's builtin ThreadLocal has a serious flaw: + it can take an arbitrarily long amount of time to + dereference the things you had stored in it, even once the + ThreadLocal instance itself is no longer referenced. + This is because there is single, master map stored for + each thread, which all ThreadLocals share, and that + master map only periodically purges "stale" entries. + + While not technically a memory leak, because eventually + the memory will be reclaimed, it can take a long time + and you can easily hit OutOfMemoryError because from the + GC's standpoint the stale entries are not reclaimaible. + + This class works around that, by only enrolling + WeakReference values into the ThreadLocal, and + separately holding a hard reference to each stored + value. When you call {@link #close}, these hard + references are cleared and then GC is freely able to + reclaim space by objects stored in it. + + + + Byte[] referencing is used because a new norm object needs + to be created for each clone, and the byte array is all + that is needed for sharing between cloned readers. The + current norm referencing is for sharing between readers + whereas the byte[] referencing is for copy on write which + is independent of reader references (i.e. incRef, decRef). + + + +

Expert: {@link IndexWriter} uses an instance + implementing this interface to execute the merges + selected by a {@link MergePolicy}. The default + MergeScheduler is {@link ConcurrentMergeScheduler}.

+ +

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+ +

NOTE: This class typically requires access to + package-private APIs (eg, SegmentInfos) to do its job; + if you implement your own MergePolicy, you'll need to put + it in package Lucene.Net.Index in order to use + these APIs. +

+
+ + Run the merges provided by {@link IndexWriter#GetNextMerge()}. + + + Close this MergeScheduler. + + + Useful constants representing filenames and extensions used by lucene + + + $rcs = ' $Id: Exp $ ' ; + + + + Name of the index segment file + + + Name of the generation reference file name + + + Name of the index deletable file (only used in + pre-lockless indices) + + + + Extension of norms file + + + Extension of freq postings file + + + Extension of prox postings file + + + Extension of terms file + + + Extension of terms index file + + + Extension of stored fields index file + + + Extension of stored fields file + + + Extension of vectors fields file + + + Extension of vectors documents file + + + Extension of vectors index file + + + Extension of compound file + + + Extension of compound file for doc store files + + + Extension of deletes + + + Extension of field infos + + + Extension of plain norms + + + Extension of separate norms + + + Extension of gen file + + + This array contains all filename extensions used by + Lucene's index files, with two exceptions, namely the + extension made up from .f + a number and + from .s + a number. Also note that + Lucene's segments_N files do not have any + filename extension. + + + + File extensions that are added to a compound file + (same as above, minus "del", "gen", "cfs"). + + + + File extensions of old-style index files + + + File extensions for term vector support + + + Computes the full file name from base, extension and + generation. If the generation is -1, the file name is + null. If it's 0, the file name is + If it's > 0, the file name is + + + -- main part of the file name + + -- extension of the filename (including .) + + -- generation + + + + Returns true if the provided filename is one of the doc + store files (ends with an extension in + STORE_INDEX_EXTENSIONS). + + + + This is the base class for an in-memory posting list, + keyed by a Token. {@link TermsHash} maintains a hash + table holding one instance of this per unique Token. + Consumers of TermsHash ({@link TermsHashConsumer}) must + subclass this class with its own concrete class. + FreqProxTermsWriter.PostingList is a private inner class used + for the freq/prox postings, and + TermVectorsTermsWriter.PostingList is a private inner class + used to hold TermVectors postings. + + + + NOTE: this API is experimental and will likely change + + + Adds a new term in this field; term ends with U+FFFF + char + + + + Called when we are done adding terms to this field + + + Adds a new term in this field + + + Called when we are done adding terms to this field + + + Class responsible for access to stored document fields. +

+ It uses <segment>.fdt and <segment>.fdx; files. + +

+ $Id: FieldsReader.java 801344 2009-08-05 18:05:06Z yonik $ + +
+ + Returns a cloned FieldsReader that shares open + IndexInputs with the original one. It is the caller's + job not to close the original FieldsReader until all + clones are called (eg, currently SegmentReader manages + this logic). + + + + AlreadyClosedException if this FieldsReader is closed + + + Closes the underlying {@link Lucene.Net.Store.IndexInput} streams, including any ones associated with a + lazy implementation of a Field. This means that the Fields values will not be accessible. + + + IOException + + + Returns the length in bytes of each raw document in a + contiguous range of length numDocs starting with + startDocID. Returns the IndexInput (the fieldStream), + already seeked to the starting point for startDocID. + + + + Skip the field. We still have to read some of the information about the field, but can skip past the actual content. + This will have the most payoff on large fields. + + + + A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is + loaded. + + + + + + + + + + Synonymous with {@link Field}. + +

WARNING: This interface may change within minor versions, despite Lucene's backward compatibility requirements. + This means new methods may be added from version to version. This change only affects the Fieldable API; other backwards + compatibility promises remain intact. For example, Lucene can still + read and write indices created within the same major version. +

+ + +

+
+ + Sets the boost factor hits on this field. This value will be + multiplied into the score of all hits on this this field of this + document. + +

The boost is multiplied by {@link Lucene.Net.Documents.Document#GetBoost()} of the document + containing this field. If a document has multiple fields with the same + name, all such values are multiplied together. This product is then + used to compute the norm factor for the field. By + default, in the {@link + Lucene.Net.Search.Similarity#ComputeNorm(String, + FieldInvertState)} method, the boost value is multiplied + by the {@link + Lucene.Net.Search.Similarity#LengthNorm(String, + int)} and then rounded by {@link Lucene.Net.Search.Similarity#EncodeNorm(float)} before it is stored in the + index. One should attempt to ensure that this product does not overflow + the range of that encoding. + +

+ + + + + + +
+ + Returns the boost factor for hits for this field. + +

The default value is 1.0. + +

Note: this value is not stored directly with the document in the index. + Documents returned from {@link Lucene.Net.Index.IndexReader#Document(int)} and + {@link Lucene.Net.Search.Hits#Doc(int)} may thus not have the same value present as when + this field was indexed. + +

+ + +
+ + Returns the name of the field as an interned string. + For example "date", "title", "body", ... + + + + The value of the field as a String, or null. +

+ For indexing, if isStored()==true, the stringValue() will be used as the stored field value + unless isBinary()==true, in which case binaryValue() will be used. + + If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token. + If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null, + else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens. +

+
+ + The value of the field as a Reader, which can be used at index time to generate indexed tokens. + + + + + The value of the field in Binary, or null. + + + + + The TokenStream for this field to be used when indexing, or null. + + + + + True if the value of the field is to be stored in the index for return + with search hits. + + + + True if the value of the field is to be indexed, so that it may be + searched on. + + + + True if the value of the field should be tokenized as text prior to + indexing. Un-tokenized fields are indexed as a single word and may not be + Reader-valued. + + + + True if the value of the field is stored and compressed within the index + + + True if the term or terms used to index this field are stored as a term + vector, available from {@link Lucene.Net.Index.IndexReader#GetTermFreqVector(int,String)}. + These methods do not provide access to the original content of the field, + only to terms used to index it. If the original content must be + preserved, use the stored attribute instead. + + + + + + + True if terms are stored as term vector together with their offsets + (start and end positon in source text). + + + + True if terms are stored as term vector together with their token positions. + + + True if the value of the field is stored as binary + + + True if norms are omitted for this indexed field + + + Expert: + + If set, omit normalization factors associated with this indexed field. + This effectively disables indexing boosts and length normalization for this field. + + + + Renamed to {@link AbstractField#setOmitTermFreqAndPositions} + + + + Renamed to {@link AbstractField#getOmitTermFreqAndPositions} + + + + Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving + it's values via {@link #StringValue()} or {@link #BinaryValue()} is only valid as long as the {@link Lucene.Net.Index.IndexReader} that + retrieved the {@link Document} is still open. + + + true if this field can be loaded lazily + + + + Returns offset into byte[] segment that is used as value, if Field is not binary + returned value is undefined + + index of the first character in byte[] segment that represents this Field value + + + + Returns length of byte[] segment that is used as value, if Field is not binary + returned value is undefined + + length of byte[] segment that represents this Field value + + + + Return the raw byte[] for the binary field. Note that + you must also call {@link #getBinaryLength} and {@link + #getBinaryOffset} to know which range of bytes in this + returned array belong to the field. + + reference to the Field value as byte[]. + + + + Return the raw byte[] for the binary field. Note that + you must also call {@link #getBinaryLength} and {@link + #getBinaryOffset} to know which range of bytes in this + returned array belong to the field.

+ About reuse: if you pass in the result byte[] and it is + used, likely the underlying implementation will hold + onto this byte[] and return it in future calls to + {@link #BinaryValue()} or {@link #GetBinaryValue()}. + So if you subsequently re-use the same byte[] elsewhere + it will alter this Fieldable's value. +

+ User defined buffer that will be used if + possible. If this is null or not large enough, a new + buffer is allocated + + reference to the Field value as byte[]. + +
+ + Sets the boost factor hits on this field. This value will be + multiplied into the score of all hits on this this field of this + document. + +

The boost is multiplied by {@link Lucene.Net.Documents.Document#GetBoost()} of the document + containing this field. If a document has multiple fields with the same + name, all such values are multiplied together. This product is then + used to compute the norm factor for the field. By + default, in the {@link + Lucene.Net.Search.Similarity#ComputeNorm(String, + FieldInvertState)} method, the boost value is multipled + by the {@link + Lucene.Net.Search.Similarity#LengthNorm(String, + int)} and then + rounded by {@link Lucene.Net.Search.Similarity#EncodeNorm(float)} before it is stored in the + index. One should attempt to ensure that this product does not overflow + the range of that encoding. + +

+ + + + + + +
+ + Returns the boost factor for hits for this field. + +

The default value is 1.0. + +

Note: this value is not stored directly with the document in the index. + Documents returned from {@link Lucene.Net.Index.IndexReader#Document(int)} and + {@link Lucene.Net.Search.Hits#Doc(int)} may thus not have the same value present as when + this field was indexed. + +

+ + +
+ + Returns the name of the field as an interned string. + For example "date", "title", "body", ... + + + + True iff the value of the field is to be stored in the index for return + with search hits. It is an error for this to be true if a field is + Reader-valued. + + + + True iff the value of the field is to be indexed, so that it may be + searched on. + + + + True iff the value of the field should be tokenized as text prior to + indexing. Un-tokenized fields are indexed as a single word and may not be + Reader-valued. + + + + True if the value of the field is stored and compressed within the index + + + True iff the term or terms used to index this field are stored as a term + vector, available from {@link Lucene.Net.Index.IndexReader#GetTermFreqVector(int,String)}. + These methods do not provide access to the original content of the field, + only to terms used to index it. If the original content must be + preserved, use the stored attribute instead. + + + + + + + True iff terms are stored as term vector together with their offsets + (start and end position in source text). + + + + True iff terms are stored as term vector together with their token positions. + + + True iff the value of the filed is stored as binary + + + Return the raw byte[] for the binary field. Note that + you must also call {@link #getBinaryLength} and {@link + #getBinaryOffset} to know which range of bytes in this + returned array belong to the field. + + reference to the Field value as byte[]. + + + + Returns length of byte[] segment that is used as value, if Field is not binary + returned value is undefined + + length of byte[] segment that represents this Field value + + + + Returns offset into byte[] segment that is used as value, if Field is not binary + returned value is undefined + + index of the first character in byte[] segment that represents this Field value + + + + True if norms are omitted for this indexed field + + + Renamed to {@link #getOmitTermFreqAndPositions} + + + + + + + + Expert: + + If set, omit normalization factors associated with this indexed field. + This effectively disables indexing boosts and length normalization for this field. + + + + Renamed to {@link #setOmitTermFreqAndPositions} + + + + Expert: + + If set, omit term freq, positions and payloads from + postings for this field. + +

NOTE: While this option reduces storage space + required in the index, it also means any query + requiring positional information, such as {@link + PhraseQuery} or {@link SpanQuery} subclasses will + silently fail to find results. +

+
+ + Prints a Field for human consumption. + + + The value of the field in Binary, or null. If null, the Reader value, + String value, or TokenStream value is used. Exactly one of stringValue(), + readerValue(), binaryValue(), and tokenStreamValue() must be set. + + + + The value of the field as a Reader, or null. If null, the String value, + binary value, or TokenStream value is used. Exactly one of stringValue(), + readerValue(), binaryValue(), and tokenStreamValue() must be set. + + + + The value of the field as a TokenStream, or null. If null, the Reader value, + String value, or binary value is used. Exactly one of stringValue(), + readerValue(), binaryValue(), and tokenStreamValue() must be set. + + + + The value of the field as a String, or null. If null, the Reader value, + binary value, or TokenStream value is used. Exactly one of stringValue(), + readerValue(), binaryValue(), and tokenStreamValue() must be set. + + + + + + + + + + Constructs a new runtime exception with null as its + detail message. The cause is not initialized, and may subsequently be + initialized by a call to {@link #innerException}. + + + + Constructs a new runtime exception with the specified cause and a + detail message of (cause==null ? null : cause.toString()) + (which typically contains the class and detail message of + cause). +

+ This constructor is useful for runtime exceptions + that are little more than wrappers for other throwables. + +

+ the cause (which is saved for later retrieval by the + {@link #InnerException()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.) + + 1.4 + +
+ + Constructs a new runtime exception with the specified detail message. + The cause is not initialized, and may subsequently be initialized by a + call to {@link #innerException}. + + + the detail message. The detail message is saved for + later retrieval by the {@link #getMessage()} method. + + + + Constructs a new runtime exception with the specified detail message and + cause.

Note that the detail message associated with + cause is not automatically incorporated in + this runtime exception's detail message. + +

+ the detail message (which is saved for later retrieval + by the {@link #getMessage()} method). + + the cause (which is saved for later retrieval by the + {@link #InnerException()} method). (A null value is + permitted, and indicates that the cause is nonexistent or + unknown.) + + 1.4 + +
+ + Provides information about what should be done with this Field + + + + + + Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is encountered. + {@link Document#GetField(String)} and {@link Document#GetFieldable(String)} should not return null. +

+ {@link Document#Add(Fieldable)} should be called by the Reader. +

+
+ + Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually contain its data until + invoked. {@link Document#GetField(String)} SHOULD NOT BE USED. {@link Document#GetFieldable(String)} is safe to use and should + return a valid instance of a {@link Fieldable}. +

+ {@link Document#Add(Fieldable)} should be called by the Reader. +

+
+ + Do not load the {@link Field}. {@link Document#GetField(String)} and {@link Document#GetFieldable(String)} should return null. + {@link Document#Add(Fieldable)} is not called. +

+ {@link Document#Add(Fieldable)} should not be called by the Reader. +

+
+ + Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading for the {@link Document}. Thus, the + Document may not have its complete set of Fields. {@link Document#GetField(String)} and {@link Document#GetFieldable(String)} should + both be valid for this {@link Field} +

+ {@link Document#Add(Fieldable)} should be called by the Reader. +

+
+ + Behaves much like {@link #LOAD} but does not uncompress any compressed data. This is used for internal purposes. + {@link Document#GetField(String)} and {@link Document#GetFieldable(String)} should not return null. +

+ {@link Document#Add(Fieldable)} should be called by + the Reader. +

+ This is an internal option only, and is + no longer needed now that {@link CompressionTools} + is used for field compression. + +
+ + Expert: Load the size of this {@link Field} rather than its value. + Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value. + The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0] + + + + Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded + + + A field is a section of a Document. Each field has two parts, a name and a + value. Values may be free text, provided as a String or as a Reader, or they + may be atomic keywords, which are not further processed. Such keywords may + be used to represent dates, urls, etc. Fields are optionally stored in the + index, so that they may be returned with hits on the document. + + + + The value of the field as a String, or null. If null, the Reader value or + binary value is used. Exactly one of stringValue(), + readerValue(), and getBinaryValue() must be set. + + + + The value of the field as a Reader, or null. If null, the String value or + binary value is used. Exactly one of stringValue(), + readerValue(), and getBinaryValue() must be set. + + + + The value of the field in Binary, or null. If null, the Reader value, + or String value is used. Exactly one of stringValue(), + readerValue(), and getBinaryValue() must be set. + + This method must allocate a new byte[] if + the {@link AbstractField#GetBinaryOffset()} is non-zero + or {@link AbstractField#GetBinaryLength()} is not the + full length of the byte[]. Please use {@link + AbstractField#GetBinaryValue()} instead, which simply + returns the byte[]. + + + + The TokesStream for this field to be used when indexing, or null. If null, the Reader value + or String value is analyzed to produce the indexed tokens. + + + +

Expert: change the value of this field. This can + be used during indexing to re-use a single Field + instance to improve indexing speed by avoiding GC cost + of new'ing and reclaiming Field instances. Typically + a single {@link Document} instance is re-used as + well. This helps most on small documents.

+ +

Each Field instance should only be used once + within a single {@link Document} instance. See ImproveIndexingSpeed + for details.

+

+
+ + Expert: change the value of this field. See setValue(String). + + + Expert: change the value of this field. See setValue(String). + + + Expert: change the value of this field. See setValue(String). + + + Expert: change the value of this field. See setValue(String). + use {@link #setTokenStream} + + + + Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true. + May be combined with stored values from stringValue() or binaryValue() + + + + Create a field by specifying its name, value and how it will + be saved in the index. Term vectors will not be stored in the index. + + + The name of the field + + The string to process + + Whether value should be stored in the index + + Whether the field should be indexed, and if so, if it should + be tokenized before indexing + + NullPointerException if name or value is null + IllegalArgumentException if the field is neither stored nor indexed + + + Create a field by specifying its name, value and how it will + be saved in the index. + + + The name of the field + + The string to process + + Whether value should be stored in the index + + Whether the field should be indexed, and if so, if it should + be tokenized before indexing + + Whether term vector should be stored + + NullPointerException if name or value is null + IllegalArgumentException in any of the following situations: +
    +
  • the field is neither stored nor indexed
  • +
  • the field is not indexed but termVector is TermVector.YES
  • +
+
+
+ + Create a field by specifying its name, value and how it will + be saved in the index. + + + The name of the field + + Whether to .intern() name or not + + The string to process + + Whether value should be stored in the index + + Whether the field should be indexed, and if so, if it should + be tokenized before indexing + + Whether term vector should be stored + + NullPointerException if name or value is null + IllegalArgumentException in any of the following situations: +
    +
  • the field is neither stored nor indexed
  • +
  • the field is not indexed but termVector is TermVector.YES
  • +
+
+
+ + Create a tokenized and indexed field that is not stored. Term vectors will + not be stored. The Reader is read only when the Document is added to the index, + i.e. you may not close the Reader until {@link IndexWriter#AddDocument(Document)} + has been called. + + + The name of the field + + The reader with the content + + NullPointerException if name or reader is null + + + Create a tokenized and indexed field that is not stored, optionally with + storing term vectors. The Reader is read only when the Document is added to the index, + i.e. you may not close the Reader until {@link IndexWriter#AddDocument(Document)} + has been called. + + + The name of the field + + The reader with the content + + Whether term vector should be stored + + NullPointerException if name or reader is null + + + Create a tokenized and indexed field that is not stored. Term vectors will + not be stored. This is useful for pre-analyzed fields. + The TokenStream is read only when the Document is added to the index, + i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)} + has been called. + + + The name of the field + + The TokenStream with the content + + NullPointerException if name or tokenStream is null + + + Create a tokenized and indexed field that is not stored, optionally with + storing term vectors. This is useful for pre-analyzed fields. + The TokenStream is read only when the Document is added to the index, + i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)} + has been called. + + + The name of the field + + The TokenStream with the content + + Whether term vector should be stored + + NullPointerException if name or tokenStream is null + + + Create a stored field with binary value. Optionally the value may be compressed. + + + The name of the field + + The binary value + + How value should be stored (compressed or not) + + IllegalArgumentException if store is Store.NO + + + Create a stored field with binary value. Optionally the value may be compressed. + + + The name of the field + + The binary value + + Starting offset in value where this Field's bytes are + + Number of bytes to use for this Field, starting at offset + + How value should be stored (compressed or not) + + IllegalArgumentException if store is Store.NO + + + Specifies whether and how a field should be stored. + + + A serializable Enum class. + + + Resolves the deserialized instance to the local reference for accurate + equals() and == comparisons. + + + a reference to Parameter as resolved in the local VM + + ObjectStreamException + + + Store the original field value in the index in a compressed form. This is + useful for long documents and for binary valued fields. + + Please use {@link CompressionTools} instead. + For string fields that were previously indexed and stored using compression, + the new way to achieve this is: First add the field indexed-only (no store) + and additionally using the same field name as a binary, stored field + with {@link CompressionTools#compressString}. + + + + Store the original field value in the index. This is useful for short texts + like a document's title which should be displayed with the results. The + value is stored in its original form, i.e. no analyzer is used before it is + stored. + + + + Do not store the field value in the index. + + + Specifies whether and how a field should be indexed. + + + Do not index the field value. This field can thus not be searched, + but one can still access its contents provided it is + {@link Field.Store stored}. + + + + Index the tokens produced by running the field's + value through an Analyzer. This is useful for + common text. + + + + this has been renamed to {@link #ANALYZED} + + + + Index the field's value without using an Analyzer, so it can be searched. + As no analyzer is used the value will be stored as a single term. This is + useful for unique Ids like product numbers. + + + + This has been renamed to {@link #NOT_ANALYZED} + + + + Expert: Index the field's value without an Analyzer, + and also disable the storing of norms. Note that you + can also separately enable/disable norms by calling + {@link Field#setOmitNorms}. No norms means that + index-time field and document boosting and field + length normalization are disabled. The benefit is + less memory usage as norms take up one byte of RAM + per indexed field for every document in the index, + during searching. Note that once you index a given + field with norms enabled, disabling norms will + have no effect. In other words, for this to have the + above described effect on a field, all instances of + that field must be indexed with NOT_ANALYZED_NO_NORMS + from the beginning. + + + + This has been renamed to + {@link #NOT_ANALYZED_NO_NORMS} + + + + Expert: Index the tokens produced by running the + field's value through an Analyzer, and also + separately disable the storing of norms. See + {@link #NOT_ANALYZED_NO_NORMS} for what norms are + and why you may want to disable them. + + + + Specifies whether and how a field should have term vectors. + + + Do not store term vectors. + + + Store the term vectors of each document. A term vector is a list + of the document's terms and their number of occurrences in that document. + + + + Store the term vector + token position information + + + + + + + Store the term vector + Token offset information + + + + + + + Store the term vector + Token position and offset information + + + + + + + + + + + Documents are the unit of indexing and search. + + A Document is a set of fields. Each field has a name and a textual value. + A field may be {@link Fieldable#IsStored() stored} with the document, in which + case it is returned with search hits on the document. Thus each document + should typically contain one or more stored fields which uniquely identify + it. + +

Note that fields which are not {@link Fieldable#IsStored() stored} are + not available in documents retrieved from the index, e.g. with {@link + ScoreDoc#doc}, {@link Searcher#Doc(int)} or {@link + IndexReader#Document(int)}. +

+
+ + Constructs a new document with no fields. + + + Sets a boost factor for hits on any field of this document. This value + will be multiplied into the score of all hits on this document. + +

The default value is 1.0. + +

Values are multiplied into the value of {@link Fieldable#GetBoost()} of + each field in this document. Thus, this method in effect sets a default + boost for the fields of this document. + +

+ + +
+ + Returns, at indexing time, the boost factor as set by {@link #SetBoost(float)}. + +

Note that once a document is indexed this value is no longer available + from the index. At search time, for retrieved documents, this method always + returns 1. This however does not mean that the boost value set at indexing + time was ignored - it was just combined with other indexing time factors and + stored elsewhere, for better indexing and search performance. (For more + information see the "norm(t,d)" part of the scoring formula in + {@link Lucene.Net.Search.Similarity Similarity}.) + +

+ + +
+ +

Adds a field to a document. Several fields may be added with + the same name. In this case, if the fields are indexed, their text is + treated as though appended for the purposes of search.

+

Note that add like the removeField(s) methods only makes sense + prior to adding a document to an index. These methods cannot + be used to change the content of an existing index! In order to achieve this, + a document has to be deleted from an index and a new changed version of that + document has to be added.

+

+
+ +

Removes field with the specified name from the document. + If multiple fields exist with this name, this method removes the first field that has been added. + If there is no field with the specified name, the document remains unchanged.

+

Note that the removeField(s) methods like the add method only make sense + prior to adding a document to an index. These methods cannot + be used to change the content of an existing index! In order to achieve this, + a document has to be deleted from an index and a new changed version of that + document has to be added.

+

+
+ +

Removes all fields with the given name from the document. + If there is no field with the specified name, the document remains unchanged.

+

Note that the removeField(s) methods like the add method only make sense + prior to adding a document to an index. These methods cannot + be used to change the content of an existing index! In order to achieve this, + a document has to be deleted from an index and a new changed version of that + document has to be added.

+

+
+ + Returns a field with the given name if any exist in this document, or + null. If multiple fields exists with this name, this method returns the + first value added. + Do not use this method with lazy loaded fields. + + + + Returns a field with the given name if any exist in this document, or + null. If multiple fields exists with this name, this method returns the + first value added. + + + + Returns the string value of the field with the given name if any exist in + this document, or null. If multiple fields exist with this name, this + method returns the first value added. If only binary fields with this name + exist, returns null. + + + + Returns an Enumeration of all the fields in a document. + use {@link #GetFields()} instead + + + + Returns a List of all the fields in a document. +

Note that fields which are not {@link Fieldable#IsStored() stored} are + not available in documents retrieved from the + index, e.g. {@link Searcher#Doc(int)} or {@link + IndexReader#Document(int)}. +

+
+ + Returns an array of {@link Field}s with the given name. + Do not use with lazy loaded fields. + This method returns an empty array when there are no + matching fields. It never returns null. + + + the name of the field + + a Field[] array + + + + Returns an array of {@link Fieldable}s with the given name. + This method returns an empty array when there are no + matching fields. It never returns null. + + + the name of the field + + a Fieldable[] array + + + + Returns an array of values of the field specified as the method parameter. + This method returns an empty array when there are no + matching fields. It never returns null. + + the name of the field + + a String[] of field values + + + + Returns an array of byte arrays for of the fields that have the name specified + as the method parameter. This method returns an empty + array when there are no matching fields. It never + returns null. + + + the name of the field + + a byte[][] of binary field values + + + + Returns an array of bytes for the first (or only) field that has the name + specified as the method parameter. This method will return null + if no binary fields with the specified name are available. + There may be non-binary fields with the same name. + + + the name of the field. + + a byte[] containing the binary field value or null + + + + Prints the fields of a document for human consumption. + + + The start and end character offset of a Token. + + + Base class for Attributes that can be added to a + {@link Lucene.Net.Util.AttributeSource}. +

+ Attributes are used to add data in a dynamic, yet type-safe way to a source + of usually streamed objects, e. g. a {@link Lucene.Net.Analysis.TokenStream}. +

+
+ + Base interface for attributes. + + + Clears the values in this AttributeImpl and resets it to its + default value. If this implementation implements more than one Attribute interface + it clears all. + + + + The default implementation of this method accesses all declared + fields of this object and prints the values in the following syntax: + +
+            public String toString() {
+            return "start=" + startOffset + ",end=" + endOffset;
+            }
+            
+ + This method may be overridden by subclasses. +
+
+ + Subclasses must implement this method and should compute + a hashCode similar to this: +
+            public int hashCode() {
+            int code = startOffset;
+            code = code * 31 + endOffset;
+            return code;
+            }
+            
+ + see also {@link #equals(Object)} +
+
+ + All values used for computation of {@link #hashCode()} + should be checked here for equality. + + see also {@link Object#equals(Object)} + + + + Copies the values from this Attribute into the passed-in + target attribute. The target implementation must support all the + Attributes this implementation supports. + + + + Shallow clone. Subclasses must override this if they + need to clone any members deeply, + + + + The start and end character offset of a Token. + + + Returns this Token's starting offset, the position of the first character + corresponding to this token in the source text. + Note that the difference between endOffset() and startOffset() may not be + equal to termText.length(), as the term text may have been altered by a + stemmer or some other filter. + + + + Set the starting and ending offset. + See StartOffset() and EndOffset() + + + + Returns this Token's ending offset, one greater than the position of the + last character corresponding to this token in the source text. The length + of the token in the source text is (endOffset - startOffset). + + + + Returns this Token's starting offset, the position of the first character + corresponding to this token in the source text. + Note that the difference between endOffset() and startOffset() may not be + equal to termText.length(), as the term text may have been altered by a + stemmer or some other filter. + + + + Set the starting and ending offset. + See StartOffset() and EndOffset() + + + + Returns this Token's ending offset, one greater than the position of the + last character corresponding to this token in the source text. The length + of the token in the source text is (endOffset - startOffset). + + + + Works in conjunction with the SinkTokenizer to provide the ability to set aside tokens + that have already been analyzed. This is useful in situations where multiple fields share + many common analysis steps and then go their separate ways. +

+ It is also useful for doing things like entity extraction or proper noun analysis as + part of the analysis workflow and saving off those tokens for use in another field. + +

+            SinkTokenizer sink1 = new SinkTokenizer();
+            SinkTokenizer sink2 = new SinkTokenizer();
+            TokenStream source1 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader1), sink1), sink2);
+            TokenStream source2 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader2), sink1), sink2);
+            TokenStream final1 = new LowerCaseFilter(source1);
+            TokenStream final2 = source2;
+            TokenStream final3 = new EntityDetect(sink1);
+            TokenStream final4 = new URLDetect(sink2);
+            d.add(new Field("f1", final1));
+            d.add(new Field("f2", final2));
+            d.add(new Field("f3", final3));
+            d.add(new Field("f4", final4));
+            
+ In this example, sink1 and sink2 will both get tokens from both + reader1 and reader2 after whitespace tokenizer + and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired. + It is important, that tees are consumed before sinks (in the above example, the field names must be + less the sink's field names). + Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene +

+ + See LUCENE-1058. +

+ WARNING: {@link TeeTokenFilter} and {@link SinkTokenizer} only work with the old TokenStream API. + If you switch to the new API, you need to use {@link TeeSinkTokenFilter} instead, which offers + the same functionality. +

+ + + Use {@link TeeSinkTokenFilter} instead + + +
+ + A TokenFilter is a TokenStream whose input is another TokenStream. +

+ This is an abstract class; subclasses must override {@link #IncrementToken()}. + +

+ + +
+ + A TokenStream enumerates the sequence of tokens, either from + {@link Field}s of a {@link Document} or from query text. +

+ This is an abstract class. Concrete subclasses are: +

    +
  • {@link Tokenizer}, a TokenStream whose input is a Reader; and
  • +
  • {@link TokenFilter}, a TokenStream whose input is another + TokenStream.
  • +
+ A new TokenStream API has been introduced with Lucene 2.9. This API + has moved from being {@link Token} based to {@link Attribute} based. While + {@link Token} still exists in 2.9 as a convenience class, the preferred way + to store the information of a {@link Token} is to use {@link AttributeImpl}s. +

+ TokenStream now extends {@link AttributeSource}, which provides + access to all of the token {@link Attribute}s for the TokenStream. + Note that only one instance per {@link AttributeImpl} is created and reused + for every token. This approach reduces object creation and allows local + caching of references to the {@link AttributeImpl}s. See + {@link #IncrementToken()} for further details. +

+ The workflow of the new TokenStream API is as follows: +

    +
  1. Instantiation of TokenStream/{@link TokenFilter}s which add/get + attributes to/from the {@link AttributeSource}.
  2. +
  3. The consumer calls {@link TokenStream#Reset()}.
  4. +
  5. The consumer retrieves attributes from the stream and stores local + references to all attributes it wants to access
  6. +
  7. The consumer calls {@link #IncrementToken()} until it returns false and + consumes the attributes after each call.
  8. +
  9. The consumer calls {@link #End()} so that any end-of-stream operations + can be performed.
  10. +
  11. The consumer calls {@link #Close()} to release any resource when finished + using the TokenStream
  12. +
+ To make sure that filters and consumers know which attributes are available, + the attributes must be added during instantiation. Filters and consumers are + not required to check for availability of attributes in + {@link #IncrementToken()}. +

+ You can find some example code for the new API in the analysis package level + Javadoc. +

+ Sometimes it is desirable to capture a current state of a TokenStream + , e. g. for buffering purposes (see {@link CachingTokenFilter}, + {@link TeeSinkTokenFilter}). For this usecase + {@link AttributeSource#CaptureState} and {@link AttributeSource#RestoreState} + can be used. +

+
+ + An AttributeSource contains a list of different {@link AttributeImpl}s, + and methods to add and get them. There can only be a single instance + of an attribute in the same AttributeSource instance. This is ensured + by passing in the actual type of the Attribute (Class<Attribute>) to + the {@link #AddAttribute(Class)}, which then checks if an instance of + that type is already present. If yes, it returns the instance, otherwise + it creates a new instance and returns it. + + + + An AttributeSource using the default attribute factory {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}. + + + An AttributeSource that uses the same attributes as the supplied one. + + + An AttributeSource using the supplied {@link AttributeFactory} for creating new {@link Attribute} instances. + + + returns the used AttributeFactory. + + + Returns a new iterator that iterates the attribute classes + in the same order they were added in. + Signature for Java 1.5: public Iterator<Class<? extends Attribute>> getAttributeClassesIterator() + + Note that this return value is different from Java in that it enumerates over the values + and not the keys + + + + Returns a new iterator that iterates all unique Attribute implementations. + This iterator may contain less entries that {@link #getAttributeClassesIterator}, + if one instance implements more than one Attribute interface. + Signature for Java 1.5: public Iterator<AttributeImpl> getAttributeImplsIterator() + + + + a cache that stores all interfaces for known implementation classes for performance (slow reflection) + + + Adds a custom AttributeImpl instance with one or more Attribute interfaces. + + + The caller must pass in a Class<? extends Attribute> value. + This method first checks if an instance of that class is + already in this AttributeSource and returns it. Otherwise a + new instance is created, added to this AttributeSource and returned. + Signature for Java 1.5: public <T extends Attribute> T addAttribute(Class<T>) + + + + Returns true, iff this AttributeSource has any attributes + + + The caller must pass in a Class<? extends Attribute> value. + Returns true, iff this AttributeSource contains the passed-in Attribute. + Signature for Java 1.5: public boolean hasAttribute(Class<? extends Attribute>) + + + + The caller must pass in a Class<? extends Attribute> value. + Returns the instance of the passed in Attribute contained in this AttributeSource + Signature for Java 1.5: public <T extends Attribute> T getAttribute(Class<T>) + + + IllegalArgumentException if this AttributeSource does not contain the + Attribute. It is recommended to always use {@link #addAttribute} even in consumers + of TokenStreams, because you cannot know if a specific TokenStream really uses + a specific Attribute. {@link #addAttribute} will automatically make the attribute + available. If you want to only use the attribute, if it is available (to optimize + consuming), use {@link #hasAttribute}. + + + + Resets all Attributes in this AttributeSource by calling + {@link AttributeImpl#Clear()} on each Attribute implementation. + + + + Captures the state of all Attributes. The return value can be passed to + {@link #restoreState} to restore the state of this or another AttributeSource. + + + + Restores this state by copying the values of all attribute implementations + that this state contains into the attributes implementations of the targetStream. + The targetStream must contain a corresponding instance for each argument + contained in this state (e.g. it is not possible to restore the state of + an AttributeSource containing a TermAttribute into a AttributeSource using + a Token instance as implementation). + + Note that this method does not affect attributes of the targetStream + that are not contained in this state. In other words, if for example + the targetStream contains an OffsetAttribute, but this state doesn't, then + the value of the OffsetAttribute remains unchanged. It might be desirable to + reset its value to the default, in which case the caller should first + call {@link TokenStream#ClearAttributes()} on the targetStream. + + + + Performs a clone of all {@link AttributeImpl} instances returned in a new + AttributeSource instance. This method can be used to e.g. create another TokenStream + with exactly the same attributes (using {@link #AttributeSource(AttributeSource)}) + + + + An AttributeFactory creates instances of {@link AttributeImpl}s. + + + returns an {@link AttributeImpl} for the supplied {@link Attribute} interface class. +

Signature for Java 1.5: public AttributeImpl createAttributeInstance(Class%lt;? extends Attribute> attClass) +

+
+ + This is the default factory that creates {@link AttributeImpl}s using the + class name of the supplied {@link Attribute} interface class by appending Impl to it. + + + + This class holds the state of an AttributeSource. + + + + + + + Remove this when old API is removed! + + + + Remove this when old API is removed! + + + + Remove this when old API is removed! + + + + Remove this when old API is removed! + + + + Remove this when old API is removed! + + + + Remove this when old API is removed! + + + + A TokenStream using the default attribute factory. + + + A TokenStream that uses the same attributes as the supplied one. + + + A TokenStream using the supplied AttributeFactory for creating new {@link Attribute} instances. + + + Remove this when old API is removed! + + + + Remove this when old API is removed! + + + + For extra performance you can globally enable the new + {@link #IncrementToken} API using {@link Attribute}s. There will be a + small, but in most cases negligible performance increase by enabling this, + but it only works if all TokenStreams use the new API and + implement {@link #IncrementToken}. This setting can only be enabled + globally. +

+ This setting only affects TokenStreams instantiated after this + call. All TokenStreams already created use the other setting. +

+ All core {@link Analyzer}s are compatible with this setting, if you have + your own TokenStreams that are also compatible, you should enable + this. +

+ When enabled, tokenization may throw {@link UnsupportedOperationException} + s, if the whole tokenizer chain is not compatible eg one of the + TokenStreams does not implement the new TokenStream API. +

+ The default is false, so there is the fallback to the old API + available. + +

+ This setting will no longer be needed in Lucene 3.0 as the old + API will be removed. + +
+ + Returns if only the new API is used. + + + + + This setting will no longer be needed in Lucene 3.0 as + the old API will be removed. + + + + Consumers (i.e., {@link IndexWriter}) use this method to advance the stream to + the next token. Implementing classes must implement this method and update + the appropriate {@link AttributeImpl}s with the attributes of the next + token. + + The producer must make no assumptions about the attributes after the + method has been returned: the caller may arbitrarily change it. If the + producer needs to preserve the state for subsequent calls, it can use + {@link #captureState} to create a copy of the current attribute state. + + This method is called for every token of a document, so an efficient + implementation is crucial for good performance. To avoid calls to + {@link #AddAttribute(Class)} and {@link #GetAttribute(Class)} or downcasts, + references to all {@link AttributeImpl}s that this stream uses should be + retrieved during instantiation. + + To ensure that filters and consumers know which attributes are available, + the attributes must be added during instantiation. Filters and consumers + are not required to check for availability of attributes in + {@link #IncrementToken()}. + + + false for end of stream; true otherwise + + Note that this method will be defined abstract in Lucene + 3.0. + + + + This method is called by the consumer after the last token has been + consumed, after {@link #IncrementToken()} returned false + (using the new TokenStream API). Streams implementing the old API + should upgrade to use this feature. +

+ This method can be used to perform any end-of-stream operations, such as + setting the final offset of a stream. The final offset of a stream might + differ from the offset of the last token eg in case one or more whitespaces + followed after the last token, but a {@link WhitespaceTokenizer} was used. + +

+ IOException +
+ + Returns the next token in the stream, or null at EOS. When possible, the + input Token should be used as the returned Token (this gives fastest + tokenization performance), but this is not required and a new Token may be + returned. Callers may re-use a single Token instance for successive calls + to this method. + + This implicitly defines a "contract" between consumers (callers of this + method) and producers (implementations of this method that are the source + for tokens): +
    +
  • A consumer must fully consume the previously returned {@link Token} + before calling this method again.
  • +
  • A producer must call {@link Token#Clear()} before setting the fields in + it and returning it
  • +
+ Also, the producer must make no assumptions about a {@link Token} after it + has been returned: the caller may arbitrarily change it. If the producer + needs to hold onto the {@link Token} for subsequent calls, it must clone() + it before storing it. Note that a {@link TokenFilter} is considered a + consumer. + +
+ a {@link Token} that may or may not be used to return; + this parameter should never be null (the callee is not required to + check for null before using it, but it is a good idea to assert that + it is not null.) + + next {@link Token} in the stream or null if end-of-stream was hit + + The new {@link #IncrementToken()} and {@link AttributeSource} + APIs should be used instead. + +
+ + Returns the next {@link Token} in the stream, or null at EOS. + + + The returned Token is a "full private copy" (not re-used across + calls to {@link #Next()}) but will be slower than calling + {@link #Next(Token)} or using the new {@link #IncrementToken()} + method with the new {@link AttributeSource} API. + + + + Resets this stream to the beginning. This is an optional operation, so + subclasses may or may not implement this method. {@link #Reset()} is not needed for + the standard indexing process. However, if the tokens of a + TokenStream are intended to be consumed more than once, it is + necessary to implement {@link #Reset()}. Note that if your TokenStream + caches tokens and feeds them back again after a reset, it is imperative + that you clone the tokens when you store them away (on the first pass) as + well as when you return them (on future passes after {@link #Reset()}). + + + + Releases resources associated with this stream. + + + Remove this when old API is removed! + + + + Remove this when old API is removed! + + + + The source of tokens for this filter. + + + Construct a token stream filtering the given input. + + + Performs end-of-stream operations, if any, and calls then end() on the + input TokenStream.

+ NOTE: Be sure to call super.end() first when overriding this method. +

+
+ + Close the input TokenStream. + + + Reset the filter as well as the input TokenStream. + + + This TokenFilter provides the ability to set aside attribute states + that have already been analyzed. This is useful in situations where multiple fields share + many common analysis steps and then go their separate ways. +

+ It is also useful for doing things like entity extraction or proper noun analysis as + part of the analysis workflow and saving off those tokens for use in another field. + +

+            TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader1));
+            TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
+            TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
+            TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader2));
+            source2.addSinkTokenStream(sink1);
+            source2.addSinkTokenStream(sink2);
+            TokenStream final1 = new LowerCaseFilter(source1);
+            TokenStream final2 = source2;
+            TokenStream final3 = new EntityDetect(sink1);
+            TokenStream final4 = new URLDetect(sink2);
+            d.add(new Field("f1", final1));
+            d.add(new Field("f2", final2));
+            d.add(new Field("f3", final3));
+            d.add(new Field("f4", final4));
+            
+ In this example, sink1 and sink2 will both get tokens from both + reader1 and reader2 after whitespace tokenizer + and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired. + It is important, that tees are consumed before sinks (in the above example, the field names must be + less the sink's field names). If you are not sure, which stream is consumed first, you can simply + add another sink and then pass all tokens to the sinks at once using {@link #consumeAllTokens}. + This TokenFilter is exhausted after this. In the above example, change + the example above to: +
+            ...
+            TokenStream final1 = new LowerCaseFilter(source1.newSinkTokenStream());
+            TokenStream final2 = source2.newSinkTokenStream();
+            sink1.consumeAllTokens();
+            sink2.consumeAllTokens();
+            ...
+            
+ In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready. +

Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene. +

+
+ + Instantiates a new TeeSinkTokenFilter. + + + Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream. + + + Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream + that pass the supplied filter. + + + + + + Adds a {@link SinkTokenStream} created by another TeeSinkTokenFilter + to this one. The supplied stream will also receive all consumed tokens. + This method can be used to pass tokens from two different tees to one sink. + + + + TeeSinkTokenFilter passes all tokens to the added sinks + when itself is consumed. To be sure, that all tokens from the input + stream are passed to the sinks, you can call this methods. + This instance is exhausted after this, but all sinks are instant available. + + + + A filter that decides which {@link AttributeSource} states to store in the sink. + + + Returns true, iff the current state of the passed-in {@link AttributeSource} shall be stored + in the sink. + + + + Called by {@link SinkTokenStream#Reset()}. This method does nothing by default + and can optionally be overridden. + + + + A SinkTokenizer can be used to cache Tokens for use in an Analyzer +

+ WARNING: {@link TeeTokenFilter} and {@link SinkTokenizer} only work with the old TokenStream API. + If you switch to the new API, you need to use {@link TeeSinkTokenFilter} instead, which offers + the same functionality. +

+ + + Use {@link TeeSinkTokenFilter} instead + + + +
+ + A Tokenizer is a TokenStream whose input is a Reader. +

+ This is an abstract class; subclasses must override {@link #IncrementToken()} +

+ NOTE: Subclasses overriding {@link #next(Token)} must call + {@link AttributeSource#ClearAttributes()} before setting attributes. + Subclasses overriding {@link #IncrementToken()} must call + {@link Token#Clear()} before setting Token attributes. +

+
+ + The text source for this Tokenizer. + + + Construct a tokenizer with null input. + + + Construct a token stream processing the given input. + + + Construct a tokenizer with null input using the given AttributeFactory. + + + Construct a token stream processing the given input using the given AttributeFactory. + + + Construct a token stream processing the given input using the given AttributeSource. + + + Construct a token stream processing the given input using the given AttributeSource. + + + By default, closes the input Reader. + + + Return the corrected offset. If {@link #input} is a {@link CharStream} subclass + this method calls {@link CharStream#CorrectOffset}, else returns currentOff. + + offset as seen in the output + + corrected offset based on the input + + + + + + Expert: Reset the tokenizer to a new reader. Typically, an + analyzer (in its reusableTokenStream method) will use + this to re-use a previously created tokenizer. + + + + Get the tokens in the internal List. +

+ WARNING: Adding tokens to this list requires the {@link #Reset()} method to be called in order for them + to be made available. Also, this Tokenizer does nothing to protect against {@link java.util.ConcurrentModificationException}s + in the case of adds happening while {@link #Next(Lucene.Net.Analysis.Token)} is being called. +

+ WARNING: Since this SinkTokenizer can be reset and the cached tokens made available again, do not modify them. Modify clones instead. + +

+ A List of {@link Lucene.Net.Analysis.Token}s + +
+ + Returns the next token out of the list of cached tokens + The next {@link Lucene.Net.Analysis.Token} in the Sink. + + IOException + + + Override this method to cache only certain tokens, or new tokens based + on the old tokens. + + + The {@link Lucene.Net.Analysis.Token} to add to the sink + + + + Reset the internal data structures to the start at the front of the list of tokens. Should be called + if tokens were added to the list after an invocation of {@link #Next(Token)} + + IOException + + + Emits the entire input as a single token. + + + Will be removed in Lucene 3.0. This method is final, as it should + not be overridden. Delegates to the backwards compatibility layer. + + + + Will be removed in Lucene 3.0. This method is final, as it should + not be overridden. Delegates to the backwards compatibility layer. + + + + Class to encode java's UTF16 char[] into UTF8 byte[] + without always allocating a new byte[] as + String.getBytes("UTF-8") does. + +

WARNING: This API is a new and experimental and + may suddenly change.

+

+
+ + Encode characters from a char[] source, starting at + offset and stopping when the character 0xffff is seen. + Returns the number of bytes written to bytesOut. + + + + Encode characters from a char[] source, starting at + offset for length chars. Returns the number of bytes + written to bytesOut. + + + + Encode characters from this String, starting at offset + for length characters. Returns the number of bytes + written to bytesOut. + + + + Convert UTF8 bytes into UTF16 characters. If offset + is non-zero, conversion starts at that starting point + in utf8, re-using the results from the previous call + up until offset. + + + + Helper methods to ease implementing {@link Object#toString()}. + + + for printing boost only if not 1.0 + + + Base class for cache implementations. + + + Returns a thread-safe cache backed by the specified cache. + In order to guarantee thread-safety, all access to the backed cache must + be accomplished through the returned cache. + + + + Called by {@link #SynchronizedCache(Cache)}. This method + returns a {@link SynchronizedCache} instance that wraps + this instance by default and can be overridden to return + e. g. subclasses of {@link SynchronizedCache} or this + in case this cache is already synchronized. + + + + Puts a (key, value)-pair into the cache. + + + Returns the value for the given key. + + + Returns whether the given key is in this cache. + + + Closes the cache. + + + Simple Cache wrapper that synchronizes all + calls that access the cache. + + + + A base class for all collectors that return a {@link TopDocs} output. This + collector allows easy extension by providing a single constructor which + accepts a {@link PriorityQueue} as well as protected members for that + priority queue and a counter of the number of total hits.
+ Extending classes can override {@link #TopDocs(int, int)} and + {@link #GetTotalHits()} in order to provide their own implementation. +
+
+ +

Expert: Collectors are primarily meant to be used to + gather raw results from a search, and implement sorting + or custom result filtering, collation, etc.

+ +

As of 2.9, this class replaces the deprecated + HitCollector, and offers an API for efficient collection + of hits across sequential {@link IndexReader}s. {@link + IndexSearcher} advances the collector through each of the + sub readers, in an arbitrary order. This results in a + higher performance means of collection.

+ +

Lucene's core collectors are derived from Collector. + Likely your application can use one of these classes, or + subclass {@link TopDocsCollector}, instead of + implementing Collector directly: + +

    + +
  • {@link TopDocsCollector} is an abstract base class + that assumes you will retrieve the top N docs, + according to some criteria, after collection is + done.
  • + +
  • {@link TopScoreDocCollector} is a concrete subclass + {@link TopDocsCollector} and sorts according to score + + docID. This is used internally by the {@link + IndexSearcher} search methods that do not take an + explicit {@link Sort}. It is likely the most frequently + used collector.
  • + +
  • {@link TopFieldCollector} subclasses {@link + TopDocsCollector} and sorts according to a specified + {@link Sort} object (sort by field). This is used + internally by the {@link IndexSearcher} search methods + that take an explicit {@link Sort}.
  • + +
  • {@link TimeLimitingCollector}, which wraps any other + Collector and aborts the search if it's taken too much + time, will subclass Collector in 3.0 (presently it + subclasses the deprecated HitCollector).
  • + +
  • {@link PositiveScoresOnlyCollector} wraps any other + Collector and prevents collection of hits whose score + is <= 0.0
  • + +
+ +

Collector decouples the score from the collected doc: + the score computation is skipped entirely if it's not + needed. Collectors that do need the score should + implement the {@link #setScorer} method, to hold onto the + passed {@link Scorer} instance, and call {@link + Scorer#Score()} within the collect method to compute the + current hit's score. If your collector may request the + score for a single hit multiple times, you should use + {@link ScoreCachingWrappingScorer}.

+ +

NOTE: The doc that is passed to the collect + method is relative to the current reader. If your + collector needs to resolve this to the docID space of the + Multi*Reader, you must re-base it by recording the + docBase from the most recent setNextReader call. Here's + a simple example showing how to collect docIDs into a + BitSet:

+ +

+            Searcher searcher = new IndexSearcher(indexReader);
+            final BitSet bits = new BitSet(indexReader.maxDoc());
+            searcher.search(query, new Collector() {
+            private int docBase;
+            
+            // ignore scorer
+            public void setScorer(Scorer scorer) {
+            }
+            
+            // accept docs out of order (for a BitSet it doesn't matter)
+            public boolean acceptsDocsOutOfOrder() {
+            return true;
+            }
+            
+            public void collect(int doc) {
+            bits.set(doc + docBase);
+            }
+            
+            public void setNextReader(IndexReader reader, int docBase) {
+            this.docBase = docBase;
+            }
+            });
+            
+ +

Not all collectors will need to rebase the docID. For + example, a collector that simply counts the total number + of hits would skip it.

+ +

NOTE: Prior to 2.9, Lucene silently filtered + out hits with score <= 0. As of 2.9, the core Collectors + no longer do that. It's very unusual to have such hits + (a negative query boost, or function query returning + negative custom scores, could cause it to happen). If + you need that behavior, use {@link + PositiveScoresOnlyCollector}.

+ +

NOTE: This API is experimental and might change + in incompatible ways in the next release.

+ +

+ 2.9 + +
+ + Called before successive calls to {@link #Collect(int)}. Implementations + that need the score of the current document (passed-in to + {@link #Collect(int)}), should save the passed-in Scorer and call + scorer.score() when needed. + + + + Called once for every document matching a query, with the unbased document + number. + +

+ Note: This is called in an inner search loop. For good search performance, + implementations of this method should not call {@link Searcher#Doc(int)} or + {@link Lucene.Net.Index.IndexReader#Document(int)} on every hit. + Doing so can slow searches by an order of magnitude or more. +

+
+ + Called before collecting from each IndexReader. All doc ids in + {@link #Collect(int)} will correspond to reader. + + Add docBase to the current IndexReaders internal document id to re-base ids + in {@link #Collect(int)}. + + + next IndexReader + + + + + + + * Return true if this collector does not + * require the matching docIDs to be delivered in int sort + * order (smallest to largest) to {@link #collect}. + * + *

Most Lucene Query implementations will visit + * matching docIDs in order. However, some queries + * (currently limited to certain cases of {@link + * BooleanQuery}) can achieve faster searching if the + * Collector allows them to deliver the + * docIDs out of order. + * + *

Many collectors don't mind getting docIDs out of + * order, so it's important to return true + * here. + * +

+ +
+ + The priority queue which holds the top documents. Note that different + implementations of PriorityQueue give different meaning to 'top documents'. + HitQueue for example aggregates the top scoring documents, while other PQ + implementations may hold documents sorted by other criteria. + + + + The total number of documents that the collector encountered. + + + Populates the results array with the ScoreDoc instaces. This can be + overridden in case a different ScoreDoc type should be returned. + + + + Returns a {@link TopDocs} instance containing the given results. If + results is null it means there are no results to return, + either because there were 0 calls to collect() or because the arguments to + topDocs were invalid. + + + + The total number of documents that matched this query. + + + Returns the top docs that were collected by this collector. + + + Returns the documents in the rage [start .. pq.size()) that were collected + by this collector. Note that if start >= pq.size(), an empty TopDocs is + returned.
+ This method is convenient to call if the application allways asks for the + last results, starting from the last 'page'.
+ NOTE: you cannot call this method more than once for each search + execution. If you need to call it more than once, passing each time a + different start, you should call {@link #TopDocs()} and work + with the returned {@link TopDocs} object, which will contain all the + results this search execution collected. +
+
+ + Returns the documents in the rage [start .. start+howMany) that were + collected by this collector. Note that if start >= pq.size(), an empty + TopDocs is returned, and if pq.size() - start < howMany, then only the + available documents in [start .. pq.size()) are returned.
+ This method is useful to call in case pagination of search results is + allowed by the search application, as well as it attempts to optimize the + memory used by allocating only as much as requested by howMany.
+ NOTE: you cannot call this method more than once for each search + execution. If you need to call it more than once, passing each time a + different range, you should call {@link #TopDocs()} and work with the + returned {@link TopDocs} object, which will contain all the results this + search execution collected. +
+
+ + A Scorer for queries with a required subscorer + and an excluding (prohibited) sub DocIdSetIterator. +
+ This Scorer implements {@link Scorer#SkipTo(int)}, + and it uses the skipTo() on the given scorers. +
+
+ + Expert: Common scoring functionality for different types of queries. + +

+ A Scorer iterates over documents matching a + query in increasing order of doc Id. +

+

+ Document scores are computed using a given Similarity + implementation. +

+ +

NOTE: The values Float.Nan, + Float.NEGATIVE_INFINITY and Float.POSITIVE_INFINITY are + not valid scores. Certain collectors (eg {@link + TopScoreDocCollector}) will not properly collect hits + with these scores. + +

+ + +
+ + Constructs a Scorer. + The Similarity implementation used by this scorer. + + + + Returns the Similarity implementation used by this scorer. + + + Scores and collects all matching documents. + The collector to which all matching documents are passed through + {@link HitCollector#Collect(int, float)}. +
When this method is used the {@link #Explain(int)} method should not be used. + + use {@link #Score(Collector)} instead. + +
+ + Scores and collects all matching documents. + The collector to which all matching documents are passed. +
When this method is used the {@link #Explain(int)} method should not be used. + +
+ + Expert: Collects matching documents in a range. Hook for optimization. + Note that {@link #Next()} must be called once before this method is called + for the first time. + + The collector to which all matching documents are passed through + {@link HitCollector#Collect(int, float)}. + + Do not score documents past this. + + true if more matching documents may remain. + + use {@link #Score(Collector, int, int)} instead. + + + + Expert: Collects matching documents in a range. Hook for optimization. + Note, firstDocID is added to ensure that {@link #NextDoc()} + was called before this method. + + + The collector to which all matching documents are passed. + + Do not score documents past this. + + + The first document ID (ensures {@link #NextDoc()} is called before + this method. + + true if more matching documents may remain. + + + + Returns the score of the current document matching the query. + Initially invalid, until {@link #Next()} or {@link #SkipTo(int)} + is called the first time, or when called from within + {@link Collector#collect}. + + + + Returns an explanation of the score for a document. +
When this method is used, the {@link #Next()}, {@link #SkipTo(int)} and + {@link #Score(HitCollector)} methods should not be used. +
+ The document number for the explanation. + + + Please use {@link IndexSearcher#explain} + or {@link Weight#explain} instead. + +
+ + Construct a ReqExclScorer. + The scorer that must match, except where + + indicates exclusion. + + + + use {@link #NextDoc()} instead. + + + + Advance to non excluded doc. +
On entry: +
    +
  • reqScorer != null,
  • +
  • exclScorer != null,
  • +
  • reqScorer was advanced once via next() or skipTo() + and reqScorer.doc() may still be excluded.
  • +
+ Advances reqScorer a non excluded required doc, if any. +
+ true iff there is a non excluded required doc. + +
+ + use {@link #DocID()} instead. + + + + Returns the score of the current document matching the query. + Initially invalid, until {@link #Next()} is called the first time. + + The score of the required scorer. + + + + use {@link #Advance(int)} instead. + + + + A Query that matches documents containing terms with a specified prefix. A PrefixQuery + is built by QueryParser for input like app*. + +

This query uses the {@link + MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} + rewrite method. +

+
+ + Constructs a query for terms starting with prefix. + + + Returns the prefix of this query. + + + Prints a user-readable version of this query. + + + A Filter that restricts search results to values that have a matching prefix in a given + field. + + + + A wrapper for {@link MultiTermQuery}, that exposes its + functionality as a {@link Filter}. +

+ MultiTermQueryWrapperFilter is not designed to + be used by itself. Normally you subclass it to provide a Filter + counterpart for a {@link MultiTermQuery} subclass. +

+ For example, {@link TermRangeFilter} and {@link PrefixFilter} extend + MultiTermQueryWrapperFilter. + This class also provides the functionality behind + {@link MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}; + this is why it is not abstract. +

+
+ + Wrap a {@link MultiTermQuery} as a Filter. + + + Expert: Return the number of unique terms visited during execution of the filter. + If there are many of them, you may consider using another filter type + or optimize your total term count in index. +

This method is not thread safe, be sure to only call it when no filter is running! + If you re-use the same filter instance for another + search, be sure to first reset the term counter + with {@link #clearTotalNumberOfTerms}. +

+ + +
+ + Expert: Resets the counting of unique terms. + Do this before executing the filter. + + + + + + Returns a BitSet with true for documents which should be + permitted in search results, and false for those that should + not. + + Use {@link #GetDocIdSet(IndexReader)} instead. + + + + Returns a DocIdSet with documents that should be + permitted in search results. + + + + Prints a user-readable version of this query. + + + Returns the maximum payload score seen, else 1 if there are no payloads on the doc. +

+ Is thread safe and completely reusable. + + +

+
+ + An abstract class that defines a way for Payload*Query instances + to transform the cumulative effects of payload scores for a document. + + + for more information + +

+ This class and its derivations are experimental and subject to change + + + + + +

Calculate the score up to this point for this doc and field + The current doc + + The field + + The start position of the matching Span + + The end position of the matching Span + + The number of payloads seen so far + + The current score so far + + The score for the current payload + + The new current Score + + + + +
+ + Calculate the final score for all the payloads seen so far for this doc/field + The current doc + + The current field + + The total number of payloads seen on this document + + The raw score for those payloads + + The final score for the payloads + + + + A ranked list of documents, used to hold search results. +

+ Caution: Iterate only over the hits needed. Iterating over all hits is + generally not desirable and may be the source of performance issues. If you + need to iterate over many or all hits, consider using the search method that + takes a {@link HitCollector}. +

+

+ Note: Deleting matching documents concurrently with traversing the + hits, might, when deleting hits that were not yet retrieved, decrease + {@link #Length()}. In such case, + {@link java.util.ConcurrentModificationException + ConcurrentModificationException} is thrown when accessing hit n + > current_{@link #Length()} (but n < {@link #Length()} + _at_start). + +

+ see {@link Searcher#Search(Query, int)}, + {@link Searcher#Search(Query, Filter, int)} and + {@link Searcher#Search(Query, Filter, int, Sort)}:
+ +
+            TopDocs topDocs = searcher.Search(query, numHits);
+            ScoreDoc[] hits = topDocs.scoreDocs;
+            for (int i = 0; i < hits.Length; i++) {
+            int docId = hits[i].doc;
+            Document d = searcher.Doc(docId);
+            // do something with current hit
+            ...
+            
+
+
+ + Tries to add new documents to hitDocs. + Ensures that the hit numbered min has been retrieved. + + + + Returns the total number of hits available in this set. + + + Returns the stored fields of the nth document in this set. +

Documents are cached, so that repeated requests for the same element may + return the same Document object. +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Returns the score for the nth document in this set. + + + Returns the id for the nth document in this set. + Note that ids may change when the index changes, so you cannot + rely on the id to be stable. + + + + Returns a {@link HitIterator} to navigate the Hits. Each item returned + from {@link Iterator#next()} is a {@link Hit}. +

+ Caution: Iterate only over the hits needed. Iterating over all + hits is generally not desirable and may be the source of + performance issues. If you need to iterate over many or all hits, consider + using a search method that takes a {@link HitCollector}. +

+

+
+ + Implements the fuzzy search query. The similarity measurement + is based on the Levenshtein (edit distance) algorithm. + + Warning: this query is not very scalable with its default prefix + length of 0 - in this case, *every* term will be enumerated and + cause an edit score calculation. + + + + + Create a new FuzzyQuery that will match terms with a similarity + of at least minimumSimilarity to term. + If a prefixLength > 0 is specified, a common prefix + of that length is also required. + + + the term to search for + + a value between 0 and 1 to set the required similarity + between the query term and the matching terms. For example, for a + minimumSimilarity of 0.5 a term of the same length + as the query term is considered similar to the query term if the edit distance + between both terms is less than length(term)*0.5 + + length of common (non-fuzzy) prefix + + IllegalArgumentException if minimumSimilarity is >= 1 or < 0 + or if prefixLength < 0 + + + + Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}. + + + Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}. + + + Returns the minimum similarity that is required for this query to match. + float value between 0.0 and 1.0 + + + + Returns the non-fuzzy prefix length. This is the number of characters at the start + of a term that must be identical (not fuzzy) to the query term if the query + is to match that term. + + + + Returns the pattern term. + + + Abstract decorator class for a DocIdSet implementation + that provides on-demand filtering/validation + mechanism on a given DocIdSet. + +

+ + Technically, this same functionality could be achieved + with ChainedFilter (under contrib/misc), however the + benefit of this class is it never materializes the full + bitset for the filter. Instead, the {@link #match} + method is invoked on-demand, per docID visited during + searching. If you know few docIDs will be visited, and + the logic behind {@link #match} is relatively costly, + this may be a better way to filter than ChainedFilter. + +

+ + +
+ + Constructor. + Underlying DocIdSet + + + + This DocIdSet implementation is cacheable if the inner set is cacheable. + + + Validation method to determine whether a docid should be in the result set. + docid to be tested + + true if input docid should be in the result set, false otherwise. + + + + Implementation of the contract to build a DocIdSetIterator. + + + + + + + Abstract decorator class of a DocIdSetIterator + implementation that provides on-demand filter/validation + mechanism on an underlying DocIdSetIterator. See {@link + FilteredDocIdSet}. + + + + Constructor. + Underlying DocIdSetIterator. + + + + Validation method to determine whether a docid should be in the result set. + docid to be tested + + true if input docid should be in the result set, false otherwise. + + + + + + use {@link #DocID()} instead. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + Expert: Collects sorted results from Searchable's and collates them. + The elements put into this queue must be of type FieldDoc. + +

Created: Feb 11, 2004 2:04:21 PM + +

+ lucene 1.4 + + $Id: FieldDocSortedHitQueue.java 695514 2008-09-15 15:42:11Z otis $ + +
+ + Creates a hit queue sorted by the given list of fields. + Fieldable names, in priority order (highest priority first). + + The number of hits to retain. Must be greater than zero. + + + + Allows redefinition of sort fields if they are null. + This is to handle the case using ParallelMultiSearcher where the + original list contains AUTO and we don't know the actual sort + type until the values come back. The fields can only be set once. + This method is thread safe. + + + + + + Returns the fields being used to sort. + + + Returns an array of collators, possibly null. The collators + correspond to any SortFields which were given a specific locale. + + Array of sort fields. + + Array, possibly null. + + + + Returns whether a is less relevant than b. + ScoreDoc + + ScoreDoc + + true if document a should be sorted after document b. + + + + A query that wraps a filter and simply returns a constant score equal to the + query boost for every document in the filter. + + + + $Id: ConstantScoreQuery.java 807180 2009-08-24 12:26:43Z markrmiller $ + + + + Returns the encapsulated filter + + + Prints a user-readable version of this query. + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + use {@link #NextDoc()} instead. + + + + use {@link #DocID()} instead. + + + + use {@link #Advance(int)} instead. + + + + A QueryParser which constructs queries to search multiple fields. + + + $Revision: 829134 $ + + + + This class is generated by JavaCC. The most important method is + {@link #Parse(String)}. + + The syntax for query strings is as follows: + A Query is a series of clauses. + A clause may be prefixed by: +
    +
  • a plus (+) or a minus (-) sign, indicating + that the clause is required or prohibited respectively; or
  • +
  • a term followed by a colon, indicating the field to be searched. + This enables one to construct queries which search multiple fields.
  • +
+ + A clause may be either: +
    +
  • a term, indicating all the documents that contain this term; or
  • +
  • a nested query, enclosed in parentheses. Note that this may be used + with a +/- prefix to require any of a set of + terms.
  • +
+ + Thus, in BNF, the query grammar is: +
+            Query  ::= ( Clause )*
+            Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
+            
+ +

+ Examples of appropriately formatted queries can be found in the query syntax + documentation. +

+ +

+ In {@link TermRangeQuery}s, QueryParser tries to detect date values, e.g. + date:[6/1/2005 TO 6/4/2005] produces a range query that searches + for "date" fields between 2005-06-01 and 2005-06-04. Note that the format + of the accepted input depends on {@link #SetLocale(Locale) the locale}. + By default a date is converted into a search term using the deprecated + {@link DateField} for compatibility reasons. + To use the new {@link DateTools} to convert dates, a + {@link Lucene.Net.Documents.DateTools.Resolution} has to be set. +

+

+ The date resolution that shall be used for RangeQueries can be set + using {@link #SetDateResolution(DateTools.Resolution)} + or {@link #SetDateResolution(String, DateTools.Resolution)}. The former + sets the default date resolution for all fields, whereas the latter can + be used to set field specific date resolutions. Field specific date + resolutions take, if set, precedence over the default date resolution. +

+

+ If you use neither {@link DateField} nor {@link DateTools} in your + index, you can create your own + query parser that inherits QueryParser and overwrites + {@link #GetRangeQuery(String, String, String, boolean)} to + use a different method for date conversion. +

+ +

Note that QueryParser is not thread-safe.

+ +

NOTE: there is a new QueryParser in contrib, which matches + the same syntax as this class, but is more modular, + enabling substantial customization to how a query is created. + +

NOTE: there is a new QueryParser in contrib, which matches + the same syntax as this class, but is more modular, + enabling substantial customization to how a query is created. + NOTE: You must specify the required {@link Version} compatibility when + creating QueryParser: +

    +
  • As of 2.9, {@link #SetEnablePositionIncrements} is true by default.
  • +
+
+
+ + Token literal values and constants. + Generated by org.javacc.parser.OtherFilesGen#start() + + + + End of File. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + RegularExpression Id. + + + Lexical state. + + + Lexical state. + + + Lexical state. + + + Lexical state. + + + Literal token values. + + + Alternative form of QueryParser.Operator.AND + + + Alternative form of QueryParser.Operator.OR + + + The actual operator that parser uses to combine query terms + + + Constructs a query parser. + the default field for query terms. + + used to find terms in the query text. + + Use {@link #QueryParser(Version, String, Analyzer)} instead + + + + Constructs a query parser. + + + Lucene version to match. See above) + + the default field for query terms. + + used to find terms in the query text. + + + + Parses a query string, returning a {@link Lucene.Net.Search.Query}. + the query string to be parsed. + + ParseException if the parsing fails + + + Returns the analyzer. + + + + Returns the field. + + + + Get the minimal similarity for fuzzy queries. + + + Set the minimum similarity for fuzzy queries. + Default is 0.5f. + + + + Get the prefix length for fuzzy queries. + Returns the fuzzyPrefixLength. + + + + Set the prefix length for fuzzy queries. Default is 0. + The fuzzyPrefixLength to set. + + + + Sets the default slop for phrases. If zero, then exact phrase matches + are required. Default value is zero. + + + + Gets the default slop for phrases. + + + Set to true to allow leading wildcard characters. +

+ When set, * or ? are allowed as + the first character of a PrefixQuery and WildcardQuery. + Note that this can produce very slow + queries on big indexes. +

+ Default: false. +

+
+ + + + + + Set to true to enable position increments in result query. +

+ When set, result phrase and multi-phrase queries will + be aware of position increments. + Useful when e.g. a StopFilter increases the position increment of + the token that follows an omitted token. +

+ Default: false. +

+
+ + + + + + Sets the boolean operator of the QueryParser. + In default mode (OR_OPERATOR) terms without any modifiers + are considered optional: for example capital of Hungary is equal to + capital OR of OR Hungary.
+ In AND_OPERATOR mode terms are considered to be in conjunction: the + above mentioned query is parsed as capital AND of AND Hungary +
+
+ + Gets implicit operator setting, which will be either AND_OPERATOR + or OR_OPERATOR. + + + + Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically + lower-cased or not. Default is true. + + + + + + + + Please use {@link #setMultiTermRewriteMethod} instead. + + + + Please use {@link #getMultiTermRewriteMethod} instead. + + + + By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} + when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it + a) Runs faster b) Does not have the scarcity of terms unduly influence score + c) avoids any "TooManyBooleanClauses" exception. + However, if your application really needs to use the + old-fashioned BooleanQuery expansion rewriting and the above + points are not relevant then use this to change + the rewrite method. + + + + + + + + Set locale used by date range parsing. + + + Returns current locale, allowing access by subclasses. + + + Sets the default date resolution used by RangeQueries for fields for which no + specific date resolutions has been set. Field specific resolutions can be set + with {@link #SetDateResolution(String, DateTools.Resolution)}. + + + the default date resolution to set + + + + Sets the date resolution used by RangeQueries for a specific field. + + + field for which the date resolution is to be set + + date resolution to set + + + + Returns the date resolution that is used by RangeQueries for the given field. + Returns null, if no default or field specific date resolution has been set + for the given field. + + + + + Sets the collator used to determine index term inclusion in ranges + for RangeQuerys. +

+ WARNING: Setting the rangeCollator to a non-null + collator using this method will cause every single index Term in the + Field referenced by lowerTerm and/or upperTerm to be examined. + Depending on the number of index Terms in this Field, the operation could + be very slow. + +

+ the collator to use when constructing RangeQuerys + +
+ + the collator used to determine index term inclusion in ranges + for RangeQuerys. + + + + use {@link #AddClause(List, int, int, Query)} instead. + + + + throw in overridden method to disallow + + + + Base implementation delegates to {@link #GetFieldQuery(String,String)}. + This method may be overridden, for example, to return + a SpanNearQuery instead of a PhraseQuery. + + + throw in overridden method to disallow + + + + throw in overridden method to disallow + + + + Builds a new BooleanQuery instance + disable coord + + new BooleanQuery instance + + + + Builds a new BooleanClause instance + sub query + + how this clause should occur when matching documents + + new BooleanClause instance + + + + Builds a new TermQuery instance + term + + new TermQuery instance + + + + Builds a new PhraseQuery instance + new PhraseQuery instance + + + + Builds a new MultiPhraseQuery instance + new MultiPhraseQuery instance + + + + Builds a new PrefixQuery instance + Prefix term + + new PrefixQuery instance + + + + Builds a new FuzzyQuery instance + Term + + minimum similarity + + prefix length + + new FuzzyQuery Instance + + + + Builds a new TermRangeQuery instance + Field + + min + + max + + true if range is inclusive + + new TermRangeQuery instance + + + + Builds a new MatchAllDocsQuery instance + new MatchAllDocsQuery instance + + + + Builds a new WildcardQuery instance + wildcard term + + new WildcardQuery instance + + + + Factory method for generating query, given a set of clauses. + By default creates a boolean query composed of clauses passed in. + + Can be overridden by extending classes, to modify query being + returned. + + + List that contains {@link BooleanClause} instances + to join. + + + Resulting {@link Query} object. + + throw in overridden method to disallow + + use {@link #GetBooleanQuery(List)} instead + + + + Factory method for generating query, given a set of clauses. + By default creates a boolean query composed of clauses passed in. + + Can be overridden by extending classes, to modify query being + returned. + + + List that contains {@link BooleanClause} instances + to join. + + + Resulting {@link Query} object. + + throw in overridden method to disallow + + + + Factory method for generating query, given a set of clauses. + By default creates a boolean query composed of clauses passed in. + + Can be overridden by extending classes, to modify query being + returned. + + + List that contains {@link BooleanClause} instances + to join. + + true if coord scoring should be disabled. + + + Resulting {@link Query} object. + + throw in overridden method to disallow + + use {@link #GetBooleanQuery(List, boolean)} instead + + + + Factory method for generating query, given a set of clauses. + By default creates a boolean query composed of clauses passed in. + + Can be overridden by extending classes, to modify query being + returned. + + + List that contains {@link BooleanClause} instances + to join. + + true if coord scoring should be disabled. + + + Resulting {@link Query} object. + + throw in overridden method to disallow + + + + Factory method for generating a query. Called when parser + parses an input term token that contains one or more wildcard + characters (? and *), but is not a prefix term token (one + that has just a single * character at the end) +

+ Depending on settings, prefix term may be lower-cased + automatically. It will not go through the default Analyzer, + however, since normal Analyzers are unlikely to work properly + with wildcard templates. +

+ Can be overridden by extending classes, to provide custom handling for + wildcard queries, which may be necessary due to missing analyzer calls. + +

+ Name of the field query will use. + + Term token that contains one or more wild card + characters (? or *), but is not simple prefix term + + + Resulting {@link Query} built for the term + + throw in overridden method to disallow + +
+ + Factory method for generating a query (similar to + {@link #getWildcardQuery}). Called when parser parses an input term + token that uses prefix notation; that is, contains a single '*' wildcard + character as its last character. Since this is a special case + of generic wildcard term, and such a query can be optimized easily, + this usually results in a different query object. +

+ Depending on settings, a prefix term may be lower-cased + automatically. It will not go through the default Analyzer, + however, since normal Analyzers are unlikely to work properly + with wildcard templates. +

+ Can be overridden by extending classes, to provide custom handling for + wild card queries, which may be necessary due to missing analyzer calls. + +

+ Name of the field query will use. + + Term token to use for building term for the query + (without trailing '*' character!) + + + Resulting {@link Query} built for the term + + throw in overridden method to disallow + +
+ + Factory method for generating a query (similar to + {@link #getWildcardQuery}). Called when parser parses + an input term token that has the fuzzy suffix (~) appended. + + + Name of the field query will use. + + Term token to use for building term for the query + + + Resulting {@link Query} built for the term + + throw in overridden method to disallow + + + + Returns a String where the escape char has been + removed, or kept only once if there was a double escape. + + Supports escaped unicode characters, e. g. translates + \\u0041 to A. + + + + + Returns the numeric value of the hexadecimal character + + + Returns a String where those characters that QueryParser + expects to be escaped are escaped by a preceding \. + + + + Command line tool to test QueryParser, using {@link Lucene.Net.Analysis.SimpleAnalyzer}. + Usage:
+ java Lucene.Net.QueryParsers.QueryParser <input> +
+
+ + Generated Token Manager. + + + Current token. + + + Next token. + + + Constructor with user supplied CharStream. + + + Reinitialise. + + + Constructor with generated Token Manager. + + + Reinitialise. + + + Get the next Token. + + + Get the specific Token. + + + Generate ParseException. + + + Enable tracing. + + + Disable tracing. + + + The default operator for parsing queries. + Use {@link QueryParser#setDefaultOperator} to change it. + + + + Creates a MultiFieldQueryParser. Allows passing of a map with term to + Boost, and the boost to apply to each term. + +

+ It will, when parse(String query) is called, construct a query like this + (assuming the query consists of two terms and you specify the two fields + title and body): +

+ + + (title:term1 body:term1) (title:term2 body:term2) + + +

+ When setDefaultOperator(AND_OPERATOR) is set, the result will be: +

+ + + +(title:term1 body:term1) +(title:term2 body:term2) + + +

+ When you pass a boost (title=>5 body=>10) you can get +

+ + + +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0) + + +

+ In other words, all the query's terms must appear, but it doesn't matter + in what fields they appear. +

+ +

+ Please use + {@link #MultiFieldQueryParser(Version, String[], Analyzer, Map)} + instead + +
+ + Creates a MultiFieldQueryParser. Allows passing of a map with term to + Boost, and the boost to apply to each term. + +

+ It will, when parse(String query) is called, construct a query like this + (assuming the query consists of two terms and you specify the two fields + title and body): +

+ + + (title:term1 body:term1) (title:term2 body:term2) + + +

+ When setDefaultOperator(AND_OPERATOR) is set, the result will be: +

+ + + +(title:term1 body:term1) +(title:term2 body:term2) + + +

+ When you pass a boost (title=>5 body=>10) you can get +

+ + + +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0) + + +

+ In other words, all the query's terms must appear, but it doesn't matter + in what fields they appear. +

+

+
+ + Creates a MultiFieldQueryParser. + +

+ It will, when parse(String query) is called, construct a query like this + (assuming the query consists of two terms and you specify the two fields + title and body): +

+ + + (title:term1 body:term1) (title:term2 body:term2) + + +

+ When setDefaultOperator(AND_OPERATOR) is set, the result will be: +

+ + + +(title:term1 body:term1) +(title:term2 body:term2) + + +

+ In other words, all the query's terms must appear, but it doesn't matter + in what fields they appear. +

+ +

+ Please use + {@link #MultiFieldQueryParser(Version, String[], Analyzer)} + instead + +
+ + Creates a MultiFieldQueryParser. + +

+ It will, when parse(String query) is called, construct a query like this + (assuming the query consists of two terms and you specify the two fields + title and body): +

+ + + (title:term1 body:term1) (title:term2 body:term2) + + +

+ When setDefaultOperator(AND_OPERATOR) is set, the result will be: +

+ + + +(title:term1 body:term1) +(title:term2 body:term2) + + +

+ In other words, all the query's terms must appear, but it doesn't matter + in what fields they appear. +

+

+
+ + Parses a query which searches on the fields specified. +

+ If x fields are specified, this effectively constructs: + +

+            <code>
+            (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
+            </code>
+            
+ +
+ Queries strings to parse + + Fields to search on + + Analyzer to use + + ParseException + if query parsing fails + + IllegalArgumentException + if the length of the queries array differs from the length of + the fields array + + Use {@link #Parse(Version,String[],String[],Analyzer)} + instead + +
+ + Parses a query which searches on the fields specified. +

+ If x fields are specified, this effectively constructs: + +

+            <code>
+            (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
+            </code>
+            
+ +
+ Lucene version to match; this is passed through to + QueryParser. + + Queries strings to parse + + Fields to search on + + Analyzer to use + + ParseException + if query parsing fails + + IllegalArgumentException + if the length of the queries array differs from the length of + the fields array + +
+ + Parses a query, searching on the fields specified. + Use this if you need to specify certain fields as required, + and others as prohibited. +

+            Usage:
+            
+            String[] fields = {"filename", "contents", "description"};
+            BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+            BooleanClause.Occur.MUST,
+            BooleanClause.Occur.MUST_NOT};
+            MultiFieldQueryParser.parse("query", fields, flags, analyzer);
+            
+            
+

+ The code above would construct a query: +

+            
+            (filename:query) +(contents:query) -(description:query)
+            
+            
+ +
+ Query string to parse + + Fields to search on + + Flags describing the fields + + Analyzer to use + + ParseException if query parsing fails + IllegalArgumentException if the length of the fields array differs + from the length of the flags array + + Use + {@link #Parse(Version, String, String[], BooleanClause.Occur[], Analyzer)} + instead + +
+ + Parses a query, searching on the fields specified. Use this if you need + to specify certain fields as required, and others as prohibited. +

+ +

+            Usage:
+            <code>
+            String[] fields = {"filename", "contents", "description"};
+            BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+            BooleanClause.Occur.MUST,
+            BooleanClause.Occur.MUST_NOT};
+            MultiFieldQueryParser.parse("query", fields, flags, analyzer);
+            </code>
+            
+

+ The code above would construct a query: + +

+            <code>
+            (filename:query) +(contents:query) -(description:query)
+            </code>
+            
+ +
+ Lucene version to match; this is passed through to + QueryParser. + + Query string to parse + + Fields to search on + + Flags describing the fields + + Analyzer to use + + ParseException + if query parsing fails + + IllegalArgumentException + if the length of the fields array differs from the length of + the flags array + +
+ + Parses a query, searching on the fields specified. + Use this if you need to specify certain fields as required, + and others as prohibited. +

+            Usage:
+            
+            String[] query = {"query1", "query2", "query3"};
+            String[] fields = {"filename", "contents", "description"};
+            BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+            BooleanClause.Occur.MUST,
+            BooleanClause.Occur.MUST_NOT};
+            MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+            
+            
+

+ The code above would construct a query: +

+            
+            (filename:query1) +(contents:query2) -(description:query3)
+            
+            
+ +
+ Queries string to parse + + Fields to search on + + Flags describing the fields + + Analyzer to use + + ParseException if query parsing fails + IllegalArgumentException if the length of the queries, fields, + and flags array differ + + Used + {@link #Parse(Version, String[], String[], BooleanClause.Occur[], Analyzer)} + instead + +
+ + Parses a query, searching on the fields specified. Use this if you need + to specify certain fields as required, and others as prohibited. +

+ +

+            Usage:
+            <code>
+            String[] query = {"query1", "query2", "query3"};
+            String[] fields = {"filename", "contents", "description"};
+            BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+            BooleanClause.Occur.MUST,
+            BooleanClause.Occur.MUST_NOT};
+            MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+            </code>
+            
+

+ The code above would construct a query: + +

+            <code>
+            (filename:query1) +(contents:query2) -(description:query3)
+            </code>
+            
+ +
+ Lucene version to match; this is passed through to + QueryParser. + + Queries string to parse + + Fields to search on + + Flags describing the fields + + Analyzer to use + + ParseException + if query parsing fails + + IllegalArgumentException + if the length of the queries, fields, and flags array differ + +
+ + An efficient implementation of JavaCC's CharStream interface.

Note that + this does not do line-number counting, but instead keeps track of the + character position of the token in the input, as required by Lucene's {@link + Lucene.Net.Analysis.Token} API. + +

+
+ + This interface describes a character stream that maintains line and + column number positions of the characters. It also has the capability + to backup the stream to some extent. An implementation of this + interface is used in the TokenManager implementation generated by + JavaCCParser. + + All the methods except backup can be implemented in any fashion. backup + needs to be implemented correctly for the correct operation of the lexer. + Rest of the methods are all used to get information like line number, + column number and the String that constitutes a token and are not used + by the lexer. Hence their implementation won't affect the generated lexer's + operation. + + + + Returns the next character from the selected input. The method + of selecting the input is the responsibility of the class + implementing this interface. Can throw any java.io.IOException. + + + + Returns the column position of the character last read. + + + + + + + Returns the line number of the character last read. + + + + + + + Returns the column number of the last character for current token (being + matched after the last call to BeginTOken). + + + + Returns the line number of the last character for current token (being + matched after the last call to BeginTOken). + + + + Returns the column number of the first character for current token (being + matched after the last call to BeginTOken). + + + + Returns the line number of the first character for current token (being + matched after the last call to BeginTOken). + + + + Backs up the input stream by amount steps. Lexer calls this method if it + had already read some characters, but could not use them to match a + (longer) token. So, they will be used again as the prefix of the next + token and it is the implemetation's responsibility to do this right. + + + + Returns the next character that marks the beginning of the next token. + All characters must remain in the buffer between two successive calls + to this method to implement backup correctly. + + + + Returns a string made up of characters from the marked token beginning + to the current buffer position. Implementations have the choice of returning + anything that they want to. For example, for efficiency, one might decide + to just return null, which is a valid implementation. + + + + Returns an array of characters that make up the suffix of length 'len' for + the currently matched token. This is used to build up the matched string + for use in actions in the case of MORE. A simple and inefficient + implementation of this is as follows : + + { + String t = GetImage(); + return t.substring(t.length() - len, t.length()).toCharArray(); + } + + + + The lexer calls this function to indicate that it is done with the stream + and hence implementations can free any resources held by this class. + Again, the body of this function can be just empty and it will not + affect the lexer's operation. + + + + Constructs from a Reader. + + + This class implements {@link InvertedDocConsumer}, which + is passed each token produced by the analyzer on each + field. It stores these tokens in a hash table, and + allocates separate byte streams per token. Consumers of + this class, eg {@link FreqProxTermsWriter} and {@link + TermVectorsTermsWriter}, write their own byte streams + under each term. + + + + Add a new thread + + + Abort (called after hitting AbortException) + + + Flush a new segment + + + Close doc stores + + + Attempt to free RAM, returning true if any RAM was + freed + + + + TermPositions provides an interface for enumerating the <document, + frequency, <position>* > tuples for a term.

The document and + frequency are the same as for a TermDocs. The positions portion lists the ordinal + positions of each occurrence of a term in a document. + +

+ + +
+ + TermDocs provides an interface for enumerating <document, frequency> + pairs for a term.

The document portion names each document containing + the term. Documents are indicated by number. The frequency portion gives + the number of times the term occurred in each document.

The pairs are + ordered by document number. +

+ + +
+ + Sets this to the data for a term. + The enumeration is reset to the start of the data for this term. + + + + Sets this to the data for the current term in a {@link TermEnum}. + This may be optimized in some implementations. + + + + Returns the current document number.

This is invalid until {@link + #Next()} is called for the first time. +

+
+ + Returns the frequency of the term within the current document.

This + is invalid until {@link #Next()} is called for the first time. +

+
+ + Moves to the next pair in the enumeration.

Returns true iff there is + such a next pair in the enumeration. +

+
+ + Attempts to read multiple entries from the enumeration, up to length of + docs. Document numbers are stored in docs, and term + frequencies are stored in freqs. The freqs array must be as + long as the docs array. + +

Returns the number of entries read. Zero is only returned when the + stream has been exhausted. +

+
+ + Skips entries to the first beyond the current whose document number is + greater than or equal to target.

Returns true iff there is such + an entry.

Behaves as if written:

+            boolean skipTo(int target) {
+            do {
+            if (!next())
+            return false;
+            } while (target > doc());
+            return true;
+            }
+            
+ Some implementations are considerably more efficient than that. +
+
+ + Frees associated resources. + + + Returns next position in the current document. It is an error to call + this more than {@link #Freq()} times + without calling {@link #Next()}

This is + invalid until {@link #Next()} is called for + the first time. +

+
+ + Returns the length of the payload at the current term position. + This is invalid until {@link #NextPosition()} is called for + the first time.
+
+ length of the current payload in number of bytes + +
+ + Returns the payload data at the current term position. + This is invalid until {@link #NextPosition()} is called for + the first time. + This method must not be called more than once after each call + of {@link #NextPosition()}. However, payloads are loaded lazily, + so if the payload data for the current position is not needed, + this method may not be called at all for performance reasons.
+ +
+ the array into which the data of this payload is to be + stored, if it is big enough; otherwise, a new byte[] array + is allocated for this purpose. + + the offset in the array into which the data of this payload + is to be stored. + + a byte[] array containing the data of this payload + + IOException +
+ + Checks if a payload can be loaded at this position. +

+ Payloads can only be loaded once per call to + {@link #NextPosition()}. + +

+ true if there is a payload available at this position that can be loaded + +
+ + A Term represents a word from text. This is the unit of search. It is + composed of two elements, the text of the word, as a string, and the name of + the field that the text occured in, an interned string. + Note that terms may represent more than words from text fields, but also + things like dates, email addresses, urls, etc. + + + + Constructs a Term with the given field and text. +

Note that a null field or null text value results in undefined + behavior for most Lucene APIs that accept a Term parameter. +

+
+ + Constructs a Term with the given field and empty text. + This serves two purposes: 1) reuse of a Term with the same field. + 2) pattern for a query. + + + + + + + Returns the field of this term, an interned string. The field indicates + the part of a document which this term came from. + + + + Returns the text of this term. In the case of words, this is simply the + text of the word. In the case of dates and other types, this is an + encoding of the object as a string. + + + + Optimized construction of new Terms by reusing same field as this Term + - avoids field.intern() overhead + + The text of the new term (field is implicitly same as this Term instance) + + A new Term + + + + Compares two terms, returning a negative integer if this + term belongs before the argument, zero if this term is equal to the + argument, and a positive integer if this term belongs after the argument. + The ordering of terms is first by field, then by text. + + + + Resets the field and text of a Term. + + + This exception is thrown when an {@link IndexReader} + tries to make changes to the index (via {@link + IndexReader#deleteDocument}, {@link + IndexReader#undeleteAll} or {@link IndexReader#setNorm}) + but changes have already been committed to the index + since this reader was instantiated. When this happens + you must open a new reader on the current index to make + the changes. + + + + For each Field, store position by position information. It ignores frequency information +

+ This is not thread-safe. +

+
+ + A Map of Integer and TVPositionInfo + + + + + + + + Never ignores positions. This mapper doesn't make much sense unless there are positions + false + + + + Callback for the TermVectorReader. + + + + + + + + + + + Callback mechanism used by the TermVectorReader + The field being read + + The number of terms in the vector + + Whether offsets are available + + Whether positions are available + + + + Get the mapping between fields and terms, sorted by the comparator + + + A map between field names and a Map. The sub-Map key is the position as the integer, the value is {@link Lucene.Net.Index.PositionBasedTermVectorMapper.TVPositionInfo}. + + + + Container for a term at a position + + + + The position of the term + + + + Note, there may be multiple terms at the same position + A List of Strings + + + + Parallel list (to {@link #getTerms()}) of TermVectorOffsetInfo objects. There may be multiple entries since there may be multiple terms at a position + A List of TermVectorOffsetInfo objects, if offsets are store. + + + + An IndexReader which reads multiple, parallel indexes. Each index added + must have the same number of documents, but typically each contains + different fields. Each document contains the union of the fields of all + documents with the same document number. When searching, matches for a + query term are from the first index added that has the field. + +

This is useful, e.g., with collections that have large fields which + change rarely and small fields that change more frequently. The smaller + fields may be re-indexed in a new index and both indexes may be searched + together. + +

Warning: It is up to you to make sure all indexes + are created and modified the same way. For example, if you add + documents to one index, you need to add the same documents in the + same order to the other indexes. Failure to do so will result in + undefined behavior. +

+
+ + Construct a ParallelReader. +

Note that all subreaders are closed if this ParallelReader is closed.

+

+
+ + Construct a ParallelReader. + indicates whether the subreaders should be closed + when this ParallelReader is closed + + + + Add an IndexReader. + IOException if there is a low-level IO error + + + Add an IndexReader whose stored fields will not be returned. This can + accellerate search when stored fields are only needed from a subset of + the IndexReaders. + + + IllegalArgumentException if not all indexes contain the same number + of documents + + IllegalArgumentException if not all indexes have the same value + of {@link IndexReader#MaxDoc()} + + IOException if there is a low-level IO error + + + Tries to reopen the subreaders. +
+ If one or more subreaders could be re-opened (i. e. subReader.reopen() + returned a new instance != subReader), then a new ParallelReader instance + is returned, otherwise this instance is returned. +

+ A re-opened instance might share one or more subreaders with the old + instance. Index modification operations result in undefined behavior + when performed before the old instance is closed. + (see {@link IndexReader#Reopen()}). +

+ If subreaders are shared, then the reference count of those + readers is increased to ensure that the subreaders remain open + until the last referring reader is closed. + +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Checks recursively if all subreaders are up to date. + + + Checks recursively if all subindexes are optimized + + + Not implemented. + UnsupportedOperationException + + + + + + + Writes norms. Each thread X field accumulates the norms + for the doc/fields it saw, then the flush method below + merges all of these together into a single _X.nrm file. + + + + Produce _X.nrm if any document had a field with norms + not disabled + + + +

This class implements a {@link MergePolicy} that tries + to merge segments into levels of exponentially + increasing size, where each level has fewer segments than + the value of the merge factor. Whenever extra segments + (beyond the merge factor upper bound) are encountered, + all segments within the level are merged. You can get or + set the merge factor using {@link #GetMergeFactor()} and + {@link #SetMergeFactor(int)} respectively.

+ +

This class is abstract and requires a subclass to + define the {@link #size} method which specifies how a + segment's size is determined. {@link LogDocMergePolicy} + is one subclass that measures size by document count in + the segment. {@link LogByteSizeMergePolicy} is another + subclass that measures size as the total byte size of the + file(s) for the segment.

+

+
+ +

Expert: a MergePolicy determines the sequence of + primitive merge operations to be used for overall merge + and optimize operations.

+ +

Whenever the segments in an index have been altered by + {@link IndexWriter}, either the addition of a newly + flushed segment, addition of many segments from + addIndexes* calls, or a previous merge that may now need + to cascade, {@link IndexWriter} invokes {@link + #findMerges} to give the MergePolicy a chance to pick + merges that are now required. This method returns a + {@link MergeSpecification} instance describing the set of + merges that should be done, or null if no merges are + necessary. When IndexWriter.optimize is called, it calls + {@link #findMergesForOptimize} and the MergePolicy should + then return the necessary merges.

+ +

Note that the policy can return more than one merge at + a time. In this case, if the writer is using {@link + SerialMergeScheduler}, the merges will be run + sequentially but if it is using {@link + ConcurrentMergeScheduler} they will be run concurrently.

+ +

The default MergePolicy is {@link + LogByteSizeMergePolicy}.

+ +

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+ +

NOTE: This class typically requires access to + package-private APIs (e.g. SegmentInfos) to do its job; + if you implement your own MergePolicy, you'll need to put + it in package Lucene.Net.Index in order to use + these APIs. +

+
+ + Determine what set of merge operations are now necessary on the index. + {@link IndexWriter} calls this whenever there is a change to the segments. + This call is always synchronized on the {@link IndexWriter} instance so + only one thread at a time will call this method. + + + the total set of segments in the index + + + + Determine what set of merge operations is necessary in order to optimize + the index. {@link IndexWriter} calls this when its + {@link IndexWriter#Optimize()} method is called. This call is always + synchronized on the {@link IndexWriter} instance so only one thread at a + time will call this method. + + + the total set of segments in the index + + requested maximum number of segments in the index (currently this + is always 1) + + contains the specific SegmentInfo instances that must be merged + away. This may be a subset of all SegmentInfos. + + + + Determine what set of merge operations is necessary in order to expunge all + deletes from the index. + + + the total set of segments in the index + + + + Release all resources for the policy. + + + Returns true if a newly flushed (not from merge) + segment should use the compound file format. + + + + Returns true if the doc store files should use the + compound file format. + + + + OneMerge provides the information necessary to perform + an individual primitive merge operation, resulting in + a single new segment. The merge spec includes the + subset of segments to be merged as well as whether the + new segment should use the compound file format. + + + + Record that an exception occurred while executing + this merge + + + + Retrieve previous exception set by {@link + #setException}. + + + + Mark this merge as aborted. If this is called + before the merge is committed then the merge will + not be committed. + + + + Returns true if this merge was aborted. + + + A MergeSpecification instance provides the information + necessary to perform multiple merges. It simply + contains a list of {@link OneMerge} instances. + + + + The subset of segments to be included in the primitive merge. + + + Exception thrown if there are any problems while + executing a merge. + + + + + Use {@link #MergePolicy.MergeException(String,Directory)} instead + + + + + Use {@link #MergePolicy.MergeException(Throwable,Directory)} instead + + + + Returns the {@link Directory} of the index that hit + the exception. + + + + Defines the allowed range of log(size) for each + level. A level is computed by taking the max segment + log size, minus LEVEL_LOG_SPAN, and finding all + segments falling within that range. + + + + Default merge factor, which is how many segments are + merged at a time + + + + Default maximum segment size. A segment of this size + + + + +

Returns the number of segments that are merged at + once and also controls the total number of segments + allowed to accumulate in the index.

+

+
+ + Determines how often segment indices are merged by + addDocument(). With smaller values, less RAM is used + while indexing, and searches on unoptimized indices are + faster, but indexing speed is slower. With larger + values, more RAM is used during indexing, and while + searches on unoptimized indices are slower, indexing is + faster. Thus larger values (> 10) are best for batch + index creation, and smaller values (< 10) for indices + that are interactively maintained. + + + + Sets whether compound file format should be used for + newly flushed and newly merged segments. + + + + Returns true if newly flushed and newly merge segments + + + + + Sets whether compound file format should be used for + newly flushed and newly merged doc store + segment files (term vectors and stored fields). + + + + Returns true if newly flushed and newly merge doc + store segment files (term vectors and stored fields) + + + + + + Sets whether the segment size should be calibrated by + the number of deletes when choosing segments for merge. + + + + Returns true if the segment size should be calibrated + by the number of deletes when choosing segments for merge. + + + + Returns true if this single info is optimized (has no + pending norms or deletes, is in the same dir as the + writer, and matches the current compound file setting + + + + Returns the merges necessary to optimize the index. + This merge policy defines "optimized" to mean only one + segment in the index, where that segment has no + deletions pending nor separate norms, and it is in + compound file format if the current useCompoundFile + setting is true. This method returns multiple merges + (mergeFactor at a time) so the {@link MergeScheduler} + in use may make use of concurrency. + + + + Finds merges necessary to expunge all deletes from the + index. We simply merge adjacent segments that have + deletes, up to mergeFactor at a time. + + + + Checks if any merges are now necessary and returns a + {@link MergePolicy.MergeSpecification} if so. A merge + is necessary when there are more than {@link + #setMergeFactor} segments at a given level. When + multiple levels have too many segments, this method + will return multiple merges, allowing the {@link + MergeScheduler} to use concurrency. + + + +

Determines the largest segment (measured by + document count) that may be merged with other segments. + Small values (e.g., less than 10,000) are best for + interactive indexing, as this limits the length of + pauses while indexing to a few seconds. Larger values + are best for batched indexing and speedier + searches.

+ +

The default value is {@link Integer#MAX_VALUE}.

+ +

The default merge policy ({@link + LogByteSizeMergePolicy}) also allows you to set this + limit by net size (in MB) of the segment, using {@link + LogByteSizeMergePolicy#setMaxMergeMB}.

+

+
+ + Returns the largest segment (measured by document + count) that may be merged with other segments. + + + + + + Bulk write a contiguous series of documents. The + lengths array is the length (in bytes) of each raw + document. The stream IndexInput is the + fieldsStream from which we should bulk-copy all + bytes. + + + + This class accepts multiple added documents and directly + writes a single segment file. It does this more + efficiently than creating a single segment per document + (with DocumentWriter) and doing standard merges on those + segments. + + Each added document is passed to the {@link DocConsumer}, + which in turn processes the document and interacts with + other consumers in the indexing chain. Certain + consumers, like {@link StoredFieldsWriter} and {@link + TermVectorsTermsWriter}, digest a document and + immediately write bytes to the "doc store" files (ie, + they do not consume RAM per document, except while they + are processing the document). + + Other consumers, eg {@link FreqProxTermsWriter} and + {@link NormsWriter}, buffer bytes in RAM and flush only + when a new segment is produced. + Once we have used our allowed RAM buffer, or the number + of added docs is large enough (in the case we are + flushing by doc count instead of RAM usage), we create a + real segment and flush it to the Directory. + + Threads: + + Multiple threads are allowed into addDocument at once. + There is an initial synchronized call to getThreadState + which allocates a ThreadState for this thread. The same + thread will get the same ThreadState over time (thread + affinity) so that if there are consistent patterns (for + example each thread is indexing a different content + source) then we make better use of RAM. Then + processDocument is called on that ThreadState without + synchronization (most of the "heavy lifting" is in this + call). Finally the synchronized "finishDocument" is + called to flush changes to the directory. + + When flush is called by IndexWriter, or, we flush + internally when autoCommit=false, we forcefully idle all + threads and flush only once they are all idle. This + means you can call flush with a given thread even while + other threads are actively adding/deleting documents. + + + Exceptions: + + Because this class directly updates in-memory posting + lists, and flushes stored fields and term vectors + directly to files in the directory, there are certain + limited times when an exception can corrupt this state. + For example, a disk full while flushing stored fields + leaves this file in a corrupt state. Or, an OOM + exception while appending to the in-memory posting lists + can corrupt that posting list. We call such exceptions + "aborting exceptions". In these cases we must call + abort() to discard all docs added since the last flush. + + All other exceptions ("non-aborting exceptions") can + still partially update the index structures. These + updates are consistent, but, they represent only a part + of the document seen up until the exception was hit. + When this happens, we immediately mark the document as + deleted so that the document is always atomically ("all + or none") added to the index. + + + + Returns true if any of the fields in the current + buffered docs have omitTermFreqAndPositions==false + + + + If non-null, various details of indexing are printed + here. + + + + Set how much RAM we can use before flushing. + + + Set max buffered docs, which means we will flush by + doc count instead of by RAM usage. + + + + Get current segment name we are writing. + + + Returns how many docs are currently buffered in RAM. + + + Returns the current doc store segment we are writing + to. This will be the same as segment when autoCommit + * is true. + + + + Returns the doc offset into the shared doc store for + the current buffered docs. + + + + Closes the current open doc stores an returns the doc + store segment name. This returns null if there are * + no buffered documents. + + + + Called if we hit an exception at a bad time (when + updating the index files) and must discard all + currently buffered docs. This resets our state, + discarding any docs added since last flush. + + + + Reset after a flush + + + Flush all pending docs to a new segment + + + Build compound file for the segment we just flushed + + + Set flushPending if it is not already set and returns + whether it was set. This is used by IndexWriter to + trigger a single flush even when multiple threads are + trying to do so. + + + + Returns a free (idle) ThreadState that may be used for + indexing this one document. This call also pauses if a + flush is pending. If delTerm is non-null then we + buffer this deleted term after the thread state has + been acquired. + + + + Returns true if the caller (IndexWriter) should now + flush. + + + + Called whenever a merge has completed and the merged segments had deletions + + + Does the synchronized work to finish/flush the + inverted document. + + + + The IndexingChain must define the {@link #GetChain(DocumentsWriter)} method + which returns the DocConsumer that the DocumentsWriter calls to process the + documents. + + + + Consumer returns this on each doc. This holds any + state that must be flushed synchronized "in docID + order". We gather these and flush them in order. + + + + Loader for text files that represent a list of stopwords. + + + + $Id: WordlistLoader.java 706342 2008-10-20 17:19:29Z gsingers $ + + + + Loads a text file and adds every line as an entry to a HashSet (omitting + leading and trailing whitespace). Every line of the file should contain only + one word. The words need to be in lowercase if you make use of an + Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + + + File containing the wordlist + + A HashSet with the file's words + + + + Loads a text file and adds every non-comment line as an entry to a HashSet (omitting + leading and trailing whitespace). Every line of the file should contain only + one word. The words need to be in lowercase if you make use of an + Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + + + File containing the wordlist + + The comment string to ignore + + A HashSet with the file's words + + + + Reads lines from a Reader and adds every line as an entry to a HashSet (omitting + leading and trailing whitespace). Every line of the Reader should contain only + one word. The words need to be in lowercase if you make use of an + Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + + + Reader containing the wordlist + + A HashSet with the reader's words + + + + Reads lines from a Reader and adds every non-comment line as an entry to a HashSet (omitting + leading and trailing whitespace). Every line of the Reader should contain only + one word. The words need to be in lowercase if you make use of an + Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + + + Reader containing the wordlist + + The string representing a comment. + + A HashSet with the reader's words + + + + Reads a stem dictionary. Each line contains: +
word\tstem
+ (i.e. two tab seperated words) + +
+ stem dictionary that overrules the stemming algorithm + + IOException +
+ + Removes stop words from a token stream. + + + Construct a token stream filtering the given input. + Use {@link #StopFilter(boolean, TokenStream, String[])} instead + + + + Construct a token stream filtering the given input. + true if token positions should record the removed stop words + + input TokenStream + + array of stop words + + Use {@link #StopFilter(boolean, TokenStream, Set)} instead. + + + + Constructs a filter which removes words from the input + TokenStream that are named in the array of words. + + Use {@link #StopFilter(boolean, TokenStream, String[], boolean)} instead + + + + Constructs a filter which removes words from the input + TokenStream that are named in the array of words. + + true if token positions should record the removed stop words + + input TokenStream + + array of stop words + + true if case is ignored + + Use {@link #StopFilter(boolean, TokenStream, Set, boolean)} instead. + + + + Construct a token stream filtering the given input. + If stopWords is an instance of {@link CharArraySet} (true if + makeStopSet() was used to construct the set) it will be directly used + and ignoreCase will be ignored since CharArraySet + directly controls case sensitivity. +

+ If stopWords is not an instance of {@link CharArraySet}, + a new CharArraySet will be constructed and ignoreCase will be + used to specify the case sensitivity of that set. + +

+ + + The set of Stop Words. + + -Ignore case when stopping. + + Use {@link #StopFilter(boolean, TokenStream, Set, boolean)} instead + +
+ + Construct a token stream filtering the given input. + If stopWords is an instance of {@link CharArraySet} (true if + makeStopSet() was used to construct the set) it will be directly used + and ignoreCase will be ignored since CharArraySet + directly controls case sensitivity. +

+ If stopWords is not an instance of {@link CharArraySet}, + a new CharArraySet will be constructed and ignoreCase will be + used to specify the case sensitivity of that set. + +

+ true if token positions should record the removed stop words + + Input TokenStream + + The set of Stop Words. + + -Ignore case when stopping. + +
+ + Constructs a filter which removes words from the input + TokenStream that are named in the Set. + + + + + Use {@link #StopFilter(boolean, TokenStream, Set)} instead + + + + Constructs a filter which removes words from the input + TokenStream that are named in the Set. + + + true if token positions should record the removed stop words + + Input stream + + The set of Stop Words. + + + + + + Builds a Set from an array of stop words, + appropriate for passing into the StopFilter constructor. + This permits this stopWords construction to be cached once when + an Analyzer is constructed. + + + passing false to ignoreCase + + + + Builds a Set from an array of stop words, + appropriate for passing into the StopFilter constructor. + This permits this stopWords construction to be cached once when + an Analyzer is constructed. + + + passing false to ignoreCase + + + + + An array of stopwords + + If true, all words are lower cased first. + + a Set containing the words + + + + + A List of Strings representing the stopwords + + if true, all words are lower cased first + + A Set containing the words + + + + Returns the next input Token whose term() is not a stop word. + + + + + Please specify this when you create the StopFilter + + + + Returns version-dependent default for enablePositionIncrements. Analyzers + that embed StopFilter use this method when creating the StopFilter. Prior + to 2.9, this returns {@link #getEnablePositionIncrementsDefault}. On 2.9 + or later, it returns true. + + + + Set the default position increments behavior of every StopFilter created + from now on. +

+ Note: behavior of a single StopFilter instance can be modified with + {@link #SetEnablePositionIncrements(boolean)}. This static method allows + control over behavior of classes using StopFilters internally, for + example {@link Lucene.Net.Analysis.Standard.StandardAnalyzer + StandardAnalyzer} if used with the no-arg ctor. +

+ Default : false. + +

+ + + Please specify this when you create the StopFilter + +
+ + + + + + If true, this StopFilter will preserve + positions of the incoming tokens (ie, accumulate and + set position increments of the removed stop tokens). + Generally, true is best as it does not + lose information (positions of the original tokens) + during indexing. + +

When set, when a token is stopped + (omitted), the position increment of the following + token is incremented. + +

NOTE: be sure to also + set {@link QueryParser#setEnablePositionIncrements} if + you use QueryParser to create queries. +

+
+ + Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and + {@link StopFilter}. + + +

+ You must specify the required {@link Version} compatibility when creating + StopAnalyzer: +

    +
  • As of 2.9, position increments are preserved
  • +
+
+
+ + An Analyzer builds TokenStreams, which analyze text. It thus represents a + policy for extracting index terms from text. +

+ Typical implementations first build a Tokenizer, which breaks the stream of + characters from the Reader into raw Tokens. One or more TokenFilters may + then be applied to the output of the Tokenizer. +

+
+ + Creates a TokenStream which tokenizes all the text in the provided + Reader. Must be able to handle null field name for + backward compatibility. + + + + Creates a TokenStream that is allowed to be re-used + from the previous time that the same thread called + this method. Callers that do not need to use more + than one TokenStream at the same time from this + analyzer should use this method for better + performance. + + + + Used by Analyzers that implement reusableTokenStream + to retrieve previously saved TokenStreams for re-use + by the same thread. + + + + Used by Analyzers that implement reusableTokenStream + to save a TokenStream for later re-use by the same + thread. + + + + This is only present to preserve + back-compat of classes that subclass a core analyzer + and override tokenStream but not reusableTokenStream + + + + Invoked before indexing a Fieldable instance if + terms have already been added to that field. This allows custom + analyzers to place an automatic position increment gap between + Fieldable instances using the same field name. The default value + position increment gap is 0. With a 0 position increment gap and + the typical default token position increment of 1, all terms in a field, + including across Fieldable instances, are in successive positions, allowing + exact PhraseQuery matches, for instance, across Fieldable instance boundaries. + + + Fieldable name being indexed. + + position increment gap, added to the next token emitted from {@link #TokenStream(String,Reader)} + + + + Just like {@link #getPositionIncrementGap}, except for + Token offsets instead. By default this returns 1 for + tokenized fields and, as if the fields were joined + with an extra space character, and 0 for un-tokenized + fields. This method is only called if the field + produced at least one token for indexing. + + + the field just indexed + + offset gap, added to the next token emitted from {@link #TokenStream(String,Reader)} + + + + Frees persistent resources used by this Analyzer + + + An array containing some common English words that are not usually useful + for searching. + + Use {@link #ENGLISH_STOP_WORDS_SET} instead + + + + An unmodifiable set containing some common English words that are not usually useful + for searching. + + + + Builds an analyzer which removes words in + ENGLISH_STOP_WORDS. + + Use {@link #StopAnalyzer(Version)} instead + + + + Builds an analyzer which removes words in ENGLISH_STOP_WORDS. + + + Builds an analyzer which removes words in + ENGLISH_STOP_WORDS. + + + See {@link StopFilter#SetEnablePositionIncrements} + + Use {@link #StopAnalyzer(Version)} instead + + + + Builds an analyzer with the stop words from the given set. + Use {@link #StopAnalyzer(Version, Set)} instead + + + + Builds an analyzer with the stop words from the given set. + + + Builds an analyzer with the stop words from the given set. + Set of stop words + + + See {@link StopFilter#SetEnablePositionIncrements} + + Use {@link #StopAnalyzer(Version, Set)} instead + + + + Builds an analyzer which removes words in the provided array. + Use {@link #StopAnalyzer(Set, boolean)} instead + + Use {@link #StopAnalyzer(Version, Set)} instead + + + + Builds an analyzer which removes words in the provided array. + Array of stop words + + + See {@link StopFilter#SetEnablePositionIncrements} + + Use {@link #StopAnalyzer(Version, Set)} instead + + + + Builds an analyzer with the stop words from the given file. + + + Use {@link #StopAnalyzer(Version, File)} instead + + + + Builds an analyzer with the stop words from the given file. + + + File to load stop words from + + + See {@link StopFilter#SetEnablePositionIncrements} + + Use {@link #StopAnalyzer(Version, File)} instead + + + + Builds an analyzer with the stop words from the given file. + + + + + See
above + + File to load stop words from + + + + Builds an analyzer with the stop words from the given reader. + + + Use {@link #StopAnalyzer(Version, Reader)} instead + + + + Builds an analyzer with the stop words from the given reader. + + + Reader to load stop words from + + + See {@link StopFilter#SetEnablePositionIncrements} + + Use {@link #StopAnalyzer(Version, Reader)} instead + + + + Builds an analyzer with the stop words from the given reader. + + + See above + + Reader to load stop words from + + + + Filters LowerCaseTokenizer with StopFilter. + + + Filters LowerCaseTokenizer with StopFilter. + + + Use by certain classes to match version compatibility + across releases of Lucene. +

+ WARNING: When changing the version parameter + that you supply to components in Lucene, do not simply + change the version at search-time, but instead also adjust + your indexing code to match, and re-index. +

+
+ + +

WARNING: if you use this setting, and then + upgrade to a newer release of Lucene, sizable changes + may happen. If precise back compatibility is important + then you should instead explicitly specify an actual + version. + If you use this constant then you may need to + re-index all of your documents when upgrading + Lucene, as the way text is indexed may have changed. + Additionally, you may need to re-test your entire + application to ensure it behaves as expected, as + some defaults may have changed and may break functionality + in your application. +

+
+ + Match settings and bugs in Lucene's 2.0 release. + + + Match settings and bugs in Lucene's 2.1 release. + + + Match settings and bugs in Lucene's 2.2 release. + + + Match settings and bugs in Lucene's 2.3 release. + + + Match settings and bugs in Lucene's 2.3 release. + + + + Stores and iterate on sorted integers in compressed form in RAM.
+ The code for compressing the differences between ascending integers was + borrowed from {@link Lucene.Net.Store.IndexInput} and + {@link Lucene.Net.Store.IndexOutput}. +

+ NOTE: this class assumes the stored integers are doc Ids (hence why it + extends {@link DocIdSet}). Therefore its {@link #Iterator()} assumes {@link + DocIdSetIterator#NO_MORE_DOCS} can be used as sentinel. If you intent to use + this value, then make sure it's not used during search flow. +

+
+ + When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, + a SortedVIntList representing the index numbers of the set bits + will be smaller than that BitSet. + + + + Create a SortedVIntList from all elements of an array of integers. + + + A sorted array of non negative integers. + + + + Create a SortedVIntList from an array of integers. + An array of sorted non negative integers. + + The number of integers to be used from the array. + + + + Create a SortedVIntList from a BitSet. + A bit set representing a set of integers. + + + + Create a SortedVIntList from an OpenBitSet. + A bit set representing a set of integers. + + + + Create a SortedVIntList. + An iterator providing document numbers as a set of integers. + This DocIdSetIterator is iterated completely when this constructor + is called and it must provide the integers in non + decreasing order. + + + + The total number of sorted integers. + + + + The size of the byte array storing the compressed sorted integers. + + + + This DocIdSet implementation is cacheable. + + + An iterator over the sorted integers. + + + + use {@link #DocID()} instead. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + A ScorerDocQueue maintains a partial ordering of its Scorers such that the + least Scorer can always be found in constant time. Put()'s and pop()'s + require log(size) time. The ordering is by Scorer.doc(). + + + + Create a ScorerDocQueue with a maximum size. + + + Adds a Scorer to a ScorerDocQueue in log(size) time. + If one tries to add more Scorers than maxSize + a RuntimeException (ArrayIndexOutOfBound) is thrown. + + + + Adds a Scorer to the ScorerDocQueue in log(size) time if either + the ScorerDocQueue is not full, or not lessThan(scorer, top()). + + + + true if scorer is added, false otherwise. + + + + Returns the least Scorer of the ScorerDocQueue in constant time. + Should not be used when the queue is empty. + + + + Returns document number of the least Scorer of the ScorerDocQueue + in constant time. + Should not be used when the queue is empty. + + + + Removes and returns the least scorer of the ScorerDocQueue in log(size) + time. + Should not be used when the queue is empty. + + + + Removes the least scorer of the ScorerDocQueue in log(size) time. + Should not be used when the queue is empty. + + + + Should be called when the scorer at top changes doc() value. + Still log(n) worst case, but it's at least twice as fast to
+            { pq.top().change(); pq.adjustTop(); }
+            
instead of
+            { o = pq.pop(); o.change(); pq.push(o); }
+            
+
+
+ + Returns the number of scorers currently stored in the ScorerDocQueue. + + + Removes all entries from the ScorerDocQueue. + + + An "open" BitSet implementation that allows direct access to the array of words + storing the bits. +

+ Unlike java.util.bitset, the fact that bits are packed into an array of longs + is part of the interface. This allows efficient implementation of other algorithms + by someone other than the author. It also allows one to efficiently implement + alternate serialization or interchange formats. +

+ OpenBitSet is faster than java.util.BitSet in most operations + and *much* faster at calculating cardinality of sets and results of set operations. + It can also handle sets of larger cardinality (up to 64 * 2**32-1) +

+ The goals of OpenBitSet are the fastest implementation possible, and + maximum code reuse. Extra safety and encapsulation + may always be built on top, but if that's built in, the cost can never be removed (and + hence people re-implement their own version in order to get better performance). + If you want a "safe", totally encapsulated (and slower and limited) BitSet + class, use java.util.BitSet. +

+

Performance Results

+ + Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M +
BitSet size = 1,000,000 +
Results are java.util.BitSet time divided by OpenBitSet time. + + + + + + + + + + +
cardinality intersect_count union nextSetBit get iterator
50% full 3.36 3.96 1.44 1.46 1.99 1.58
1% full 3.31 3.90   1.04   0.99
+
+ Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M +
BitSet size = 1,000,000 +
Results are java.util.BitSet time divided by OpenBitSet time. + + + + + + + + + + +
cardinality intersect_count union nextSetBit get iterator
50% full 2.50 3.50 1.00 1.03 1.12 1.25
1% full 2.51 3.49   1.00   1.02
+
+ $Id$ + +
+ + Constructs an OpenBitSet large enough to hold numBits. + + + + + + + Constructs an OpenBitSet from an existing long[]. +
+ The first 64 bits are in long[0], + with bit index 0 at the least significant bit, and bit index 63 at the most significant. + Given a bit index, + the word containing it is long[index/64], and it is at bit number index%64 within that word. +

+ numWords are the number of elements in the array that contain + set bits (non-zero longs). + numWords should be <= bits.length, and + any existing words in the array at position >= numWords should be zero. + +

+
+ + This DocIdSet implementation is cacheable. + + + Returns the current capacity in bits (1 greater than the index of the last bit) + + + Returns the current capacity of this set. Included for + compatibility. This is *not* equal to {@link #cardinality} + + + + Returns true if there are no set bits + + + Expert: returns the long[] storing the bits + + + Expert: sets a new long[] to use as the bit storage + + + Expert: gets the number of longs in the array that are in use + + + Expert: sets the number of longs in the array that are in use + + + Returns true or false for the specified bit index. + + + Returns true or false for the specified bit index. + The index should be less than the OpenBitSet size + + + + Returns true or false for the specified bit index + + + Returns true or false for the specified bit index. + The index should be less than the OpenBitSet size. + + + + returns 1 if the bit is set, 0 if not. + The index should be less than the OpenBitSet size + + + + sets a bit, expanding the set size if necessary + + + Sets the bit at the specified index. + The index should be less than the OpenBitSet size. + + + + Sets the bit at the specified index. + The index should be less than the OpenBitSet size. + + + + Sets a range of bits, expanding the set size if necessary + + + lower index + + one-past the last bit to set + + + + clears a bit. + The index should be less than the OpenBitSet size. + + + + clears a bit. + The index should be less than the OpenBitSet size. + + + + clears a bit, allowing access beyond the current set size without changing the size. + + + Clears a range of bits. Clearing past the end does not change the size of the set. + + + lower index + + one-past the last bit to clear + + + + Clears a range of bits. Clearing past the end does not change the size of the set. + + + lower index + + one-past the last bit to clear + + + + Sets a bit and returns the previous value. + The index should be less than the OpenBitSet size. + + + + Sets a bit and returns the previous value. + The index should be less than the OpenBitSet size. + + + + flips a bit. + The index should be less than the OpenBitSet size. + + + + flips a bit. + The index should be less than the OpenBitSet size. + + + + flips a bit, expanding the set size if necessary + + + flips a bit and returns the resulting bit value. + The index should be less than the OpenBitSet size. + + + + flips a bit and returns the resulting bit value. + The index should be less than the OpenBitSet size. + + + + Flips a range of bits, expanding the set size if necessary + + + lower index + + one-past the last bit to flip + + + + the number of set bits + + + + Returns the popcount or cardinality of the intersection of the two sets. + Neither set is modified. + + + + Returns the popcount or cardinality of the union of the two sets. + Neither set is modified. + + + + Returns the popcount or cardinality of "a and not b" + or "intersection(a, not(b))". + Neither set is modified. + + + + Returns the popcount or cardinality of the exclusive-or of the two sets. + Neither set is modified. + + + + Returns the index of the first set bit starting at the index specified. + -1 is returned if there are no more set bits. + + + + Returns the index of the first set bit starting at the index specified. + -1 is returned if there are no more set bits. + + + + this = this AND other + + + this = this OR other + + + Remove all elements set in other. this = this AND_NOT other + + + this = this XOR other + + + returns true if the sets have any elements in common + + + Expand the long[] with the size given as a number of words (64 bit longs). + getNumWords() is unchanged by this call. + + + + Ensure that the long[] is big enough to hold numBits, expanding it if necessary. + getNumWords() is unchanged by this call. + + + + Lowers numWords, the number of words in use, + by checking for trailing zero words. + + + + returns the number of 64 bit words it would take to hold numBits + + + returns true if both sets have the same bits set + + + Construct an OpenBitSetDISI with its bits set + from the doc ids of the given DocIdSetIterator. + Also give a maximum size one larger than the largest doc id for which a + bit may ever be set on this OpenBitSetDISI. + + + + Construct an OpenBitSetDISI with no bits set, and a given maximum size + one larger than the largest doc id for which a bit may ever be set + on this OpenBitSetDISI. + + + + Perform an inplace OR with the doc ids from a given DocIdSetIterator, + setting the bit for each such doc id. + These doc ids should be smaller than the maximum size passed to the + constructor. + + + + Perform an inplace AND with the doc ids from a given DocIdSetIterator, + leaving only the bits set for which the doc ids are in common. + These doc ids should be smaller than the maximum size passed to the + constructor. + + + + Perform an inplace NOT with the doc ids from a given DocIdSetIterator, + clearing all the bits for each such doc id. + These doc ids should be smaller than the maximum size passed to the + constructor. + + + + Perform an inplace XOR with the doc ids from a given DocIdSetIterator, + flipping all the bits for each such doc id. + These doc ids should be smaller than the maximum size passed to the + constructor. + + + + This exception is thrown when there is an attempt to + access something that has already been closed. + + + + The {@link TimeLimitingCollector} is used to timeout search requests that + take longer than the maximum allowed search time limit. After this time is + exceeded, the search thread is stopped by throwing a + {@link TimeExceededException}. + + + + Default timer resolution. + + + + + Default for {@link #IsGreedy()}. + + + + + Create a TimeLimitedCollector wrapper over another {@link Collector} with a specified timeout. + the wrapped {@link Collector} + + max time allowed for collecting hits after which {@link TimeExceededException} is thrown + + + + Return the timer resolution. + + + + + Set the timer resolution. + The default timer resolution is 20 milliseconds. + This means that a search required to take no longer than + 800 milliseconds may be stopped after 780 to 820 milliseconds. +
Note that: +
    +
  • Finer (smaller) resolution is more accurate but less efficient.
  • +
  • Setting resolution to less than 5 milliseconds will be silently modified to 5 milliseconds.
  • +
  • Setting resolution smaller than current resolution might take effect only after current + resolution. (Assume current resolution of 20 milliseconds is modified to 5 milliseconds, + then it can take up to 20 milliseconds for the change to have effect.
  • +
+
+
+ + Checks if this time limited collector is greedy in collecting the last hit. + A non greedy collector, upon a timeout, would throw a {@link TimeExceededException} + without allowing the wrapped collector to collect current doc. A greedy one would + first allow the wrapped hit collector to collect current doc and only then + throw a {@link TimeExceededException}. + + + + + + Sets whether this time limited collector is greedy. + true to make this time limited greedy + + + + + + Calls {@link Collector#Collect(int)} on the decorated {@link Collector} + unless the allowed time has passed, in which case it throws an exception. + + + TimeExceededException + if the time allowed has exceeded. + + + + TimerThread provides a pseudo-clock service to all searching + threads, so that they can count elapsed time with less overhead + than repeatedly calling System.currentTimeMillis. A single + thread should be created to be used for all searches. + + + + Get the timer value in milliseconds. + + + Thrown when elapsed search time exceeds allowed search time. + + + Returns allowed time (milliseconds). + + + Returns elapsed time (milliseconds). + + + Returns last doc that was collected when the search time exceeded. + + + The interface for search implementations. + +

+ Searchable is the abstract network protocol for searching. Implementations + provide search over a single index, over multiple indices, and over indices + on remote servers. + +

+ Queries, filters and sort criteria are designed to be compact so that they + may be efficiently passed to a remote index, with only the top-scoring hits + being returned, rather than every matching hit. + + NOTE: this interface is kept public for convenience. Since it is not + expected to be implemented directly, it may be changed unexpectedly between + releases. +

+
+ + Lower-level search API. + +

{@link HitCollector#Collect(int,float)} is called for every non-zero + scoring document. +
HitCollector-based access to remote indexes is discouraged. + +

Applications should only use this if they need all of the + matching documents. The high-level search API ({@link + Searcher#Search(Query)}) is usually more efficient, as it skips + non-high-scoring hits. + +

+ to match documents + + if non-null, used to permit documents to be collected. + + to receive hits + + BooleanQuery.TooManyClauses + use {@link #Search(Weight, Filter, Collector)} instead. + +
+ + Lower-level search API. + +

+ {@link Collector#Collect(int)} is called for every document.
+ Collector-based access to remote indexes is discouraged. + +

+ Applications should only use this if they need all of the matching + documents. The high-level search API ({@link Searcher#Search(Query)}) is + usually more efficient, as it skips non-high-scoring hits. + +

+ to match documents + + if non-null, used to permit documents to be collected. + + to receive hits + + BooleanQuery.TooManyClauses +
+ + Frees resources associated with this Searcher. + Be careful not to call this method while you are still using objects + like {@link Hits}. + + + + Expert: Returns the number of documents containing term. + Called by search code to compute term weights. + + + + + + Expert: For each term in the terms array, calculates the number of + documents containing term. Returns an array with these + document frequencies. Used to minimize number of remote calls. + + + + Expert: Returns one greater than the largest possible document number. + Called by search code to compute term weights. + + + + + + Expert: Low-level search implementation. Finds the top n + hits for query, applying filter if non-null. + +

Called by {@link Hits}. + +

Applications should usually call {@link Searcher#Search(Query)} or + {@link Searcher#Search(Query,Filter)} instead. +

+ BooleanQuery.TooManyClauses +
+ + Expert: Returns the stored fields of document i. + Called by {@link HitCollector} implementations. + + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Get the {@link Lucene.Net.Documents.Document} at the nth position. The {@link Lucene.Net.Documents.FieldSelector} + may be used to determine what {@link Lucene.Net.Documents.Field}s to load and how they should be loaded. + + NOTE: If the underlying Reader (more specifically, the underlying FieldsReader) is closed before the lazy {@link Lucene.Net.Documents.Field} is + loaded an exception may be thrown. If you want the value of a lazy {@link Lucene.Net.Documents.Field} to be available after closing you must + explicitly load it or fetch the Document again with a new loader. + + + + Get the document at the nth position + + The {@link Lucene.Net.Documents.FieldSelector} to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded. + + The stored fields of the {@link Lucene.Net.Documents.Document} at the nth position + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + + + + + + + + + + + + + Expert: called to re-write queries into primitive queries. + BooleanQuery.TooManyClauses + + + Expert: low-level implementation method + Returns an Explanation that describes how doc scored against + weight. + +

This is intended to be used in developing Similarity implementations, + and, for good performance, should not be displayed with every hit. + Computing an explanation is as expensive as executing the query over the + entire index. +

Applications should call {@link Searcher#Explain(Query, int)}. +

+ BooleanQuery.TooManyClauses +
+ + Expert: Low-level search implementation with arbitrary sorting. Finds + the top n hits for query, applying + filter if non-null, and sorting the hits by the criteria in + sort. + +

Applications should usually call + {@link Searcher#Search(Query,Filter,int,Sort)} instead. + +

+ BooleanQuery.TooManyClauses +
+ + Constrains search results to only match those which also match a provided + query. Results are cached, so that searches after the first on the same + index using this filter are much faster. + + + $Id: QueryFilter.java 528298 2007-04-13 00:59:28Z hossman $ + + use a CachingWrapperFilter with QueryWrapperFilter + + + + Wraps another filter's result and caches it. The purpose is to allow + filters to simply filter, and then wrap with this class to add caching. + + + + A transient Filter cache. + + + Filter to cache results of + + + + Use {@link #GetDocIdSet(IndexReader)} instead. + + + + Provide the DocIdSet to be cached, using the DocIdSet provided + by the wrapped Filter. + This implementation returns the given DocIdSet. + + + + Constructs a filter which only matches documents matching + query. + + + + Wrapper used by {@link HitIterator} to provide a lazily loaded hit + from {@link Hits}. + + + Use {@link TopScoreDocCollector} and {@link TopDocs} instead. Hits will be removed in Lucene 3.0. + + + + Constructed from {@link HitIterator} + Hits returned from a search + + Hit index in Hits + + + + Returns document for this hit. + + + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Returns score for this hit. + + + + + + + Returns id for this hit. + + + + + + + Returns the boost factor for this hit on any field of the underlying document. + + + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Returns the string value of the field with the given name if any exist in + this document, or null. If multiple fields exist with this name, this + method returns the first value added. If only binary fields with this name + exist, returns null. + + + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Prints the parameters to be used to discover the promised result. + + + Subclass of FilteredTermEnum for enumerating all terms that are similiar + to the specified filter term. + +

Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+
+ + Creates a FuzzyTermEnum with an empty prefix and a minSimilarity of 0.5f. +

+ After calling the constructor the enumeration is already pointing to the first + valid term if such a term exists. + +

+ + + + + IOException + + +
+ + Creates a FuzzyTermEnum with an empty prefix. +

+ After calling the constructor the enumeration is already pointing to the first + valid term if such a term exists. + +

+ + + + + + + IOException + + +
+ + Constructor for enumeration of all terms from specified reader which share a prefix of + length prefixLength with term and which have a fuzzy similarity > + minSimilarity. +

+ After calling the constructor the enumeration is already pointing to the first + valid term if such a term exists. + +

+ Delivers terms. + + Pattern term. + + Minimum required similarity for terms from the reader. Default value is 0.5f. + + Length of required common prefix. Default value is 0. + + IOException +
+ + The termCompare method in FuzzyTermEnum uses Levenshtein distance to + calculate the distance between the given term and the comparing term. + + + + Finds and returns the smallest of three integers + + +

Similarity returns a number that is 1.0f or less (including negative numbers) + based on how similar the Term is compared to a target term. It returns + exactly 0.0f when +

+            editDistance < maximumEditDistance
+ Otherwise it returns: +
+            1 - (editDistance / length)
+ where length is the length of the shortest term (text or target) including a + prefix that are identical and editDistance is the Levenshtein distance for + the two words.

+ +

Embedded within this algorithm is a fail-fast Levenshtein distance + algorithm. The fail-fast algorithm differs from the standard Levenshtein + distance algorithm in that it is aborted if it is discovered that the + mimimum distance between the words is greater than some threshold. + +

To calculate the maximum distance threshold we use the following formula: +

+            (1 - minimumSimilarity) * length
+ where length is the shortest term including any prefix that is not part of the + similarity comparision. This formula was derived by solving for what maximum value + of distance returns false for the following statements: +
+            similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
+            return (similarity > minimumSimilarity);
+ where distance is the Levenshtein distance for the two words. +

+

Levenshtein distance (also known as edit distance) is a measure of similiarity + between two strings where the distance is measured as the number of character + deletions, insertions or substitutions required to transform one string to + the other string. +

+ the target word or phrase + + the similarity, 0.0 or less indicates that it matches less than the required + threshold and 1.0 indicates that the text and target are identical + +
+ + Grow the second dimension of the array, so that we can calculate the + Levenshtein difference. + + + + The max Distance is the maximum Levenshtein distance for the text + compared to some other value that results in score that is + better than the minimum similarity. + + the length of the "other value" + + the maximum levenshtein distance that we care about + + + + This interface is obsolete, use {@link FieldCache} instead. + + + Use {@link FieldCache}, this will be removed in Lucene 3.0 + + + + + Use {@link FieldCache#DEFAULT}; this will be removed in Lucene 3.0 + + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as a single byte and returns an array + of size reader.maxDoc() of the value each document + has in the given field. + + Used to get field values. + + Which field contains the single byte values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as bytes and returns an array of + size reader.maxDoc() of the value each document has in the + given field. + + Used to get field values. + + Which field contains the bytes. + + Computes byte for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as shorts and returns an array + of size reader.maxDoc() of the value each document + has in the given field. + + Used to get field values. + + Which field contains the shorts. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as shorts and returns an array of + size reader.maxDoc() of the value each document has in the + given field. + + Used to get field values. + + Which field contains the shorts. + + Computes short for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as integers and returns an array + of size reader.maxDoc() of the value each document + has in the given field. + + Used to get field values. + + Which field contains the integers. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as integers and returns an array of + size reader.maxDoc() of the value each document has in the + given field. + + Used to get field values. + + Which field contains the integers. + + Computes integer for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if + none is found, reads the terms in field as floats and returns an array + of size reader.maxDoc() of the value each document + has in the given field. + + Used to get field values. + + Which field contains the floats. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if + none is found, reads the terms in field as floats and returns an array + of size reader.maxDoc() of the value each document + has in the given field. + + Used to get field values. + + Which field contains the floats. + + Computes float for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as longs and returns an array + of size reader.maxDoc() of the value each document + has in the given field. + + + Used to get field values. + + Which field contains the longs. + + The values in the given field for each document. + + java.io.IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as longs and returns an array of + size reader.maxDoc() of the value each document has in the + given field. + + + Used to get field values. + + Which field contains the longs. + + Computes integer for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is + found, reads the terms in field as integers and returns an array + of size reader.maxDoc() of the value each document + has in the given field. + + + Used to get field values. + + Which field contains the doubles. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none is found, + reads the terms in field as doubles and returns an array of + size reader.maxDoc() of the value each document has in the + given field. + + + Used to get field values. + + Which field contains the doubles. + + Computes integer for string values. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none + is found, reads the term values in field and returns an array + of size reader.maxDoc() containing the value each document + has in the given field. + + Used to get field values. + + Which field contains the strings. + + The values in the given field for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if none + is found reads the term values in field and returns + an array of them in natural order, along with an array telling + which element in the term array each document uses. + + Used to get field values. + + Which field contains the strings. + + Array of terms and index into the array for each document. + + IOException If any error occurs. + + + Checks the internal cache for an appropriate entry, and if + none is found reads field to see if it contains integers, longs, floats + or strings, and then calls one of the other methods in this class to get the + values. For string values, a StringIndex is returned. After + calling this method, there is an entry in the cache for both + type AUTO and the actual found type. + + Used to get field values. + + Which field contains the values. + + int[], long[], float[] or StringIndex. + + IOException If any error occurs. + Please specify the exact type, instead. + Especially, guessing does not work with the new + {@link NumericField} type. + + + + Checks the internal cache for an appropriate entry, and if none + is found reads the terms out of field and calls the given SortComparator + to get the sort values. A hit in the cache will happen if reader, + field, and comparator are the same (using equals()) + as a previous call to this method. + + Used to get field values. + + Which field contains the values. + + Used to convert terms into something to sort by. + + Array of sort objects, one for each document. + + IOException If any error occurs. + Please implement {@link + FieldComparatorSource} directly, instead. + + + + EXPERT: Generates an array of CacheEntry objects representing all items + currently in the FieldCache. +

+ NOTE: These CacheEntry objects maintain a strong refrence to the + Cached Values. Maintaining refrences to a CacheEntry the IndexReader + associated with it has garbage collected will prevent the Value itself + from being garbage collected when the Cache drops the WeakRefrence. +

+

+ EXPERIMENTAL API: This API is considered extremely advanced + and experimental. It may be removed or altered w/o warning in future + releases + of Lucene. +

+

+
+ +

+ EXPERT: Instructs the FieldCache to forcibly expunge all entries + from the underlying caches. This is intended only to be used for + test methods as a way to ensure a known base state of the Cache + (with out needing to rely on GC to free WeakReferences). + It should not be relied on for "Cache maintenance" in general + application code. +

+

+ EXPERIMENTAL API: This API is considered extremely advanced + and experimental. It may be removed or altered w/o warning in future + releases + of Lucene. +

+

+
+ + If non-null, FieldCacheImpl will warn whenever + entries are created that are not sane according to + {@link Lucene.Net.Util.FieldCacheSanityChecker}. + + + + counterpart of {@link #SetInfoStream(PrintStream)} + + + Will be removed in 3.0, this is for binary compatibility only + + + + Will be removed in 3.0, this is for binary compatibility only + + + + Use {@link FieldCache.LongParser}, this will be removed in Lucene 3.0 + + + + Interface to parse long from document fields. + + + Use {@link FieldCache.LongParser}, this will be removed in Lucene 3.0 + + + + Marker interface as super-interface to all parsers. It + is used to specify a custom parser to {@link + SortField#SortField(String, FieldCache.Parser)}. + + + + Return an long representation of this field's value. + + + Use {@link FieldCache.DoubleParser}, this will be removed in Lucene 3.0 + + + + Interface to parse doubles from document fields. + + + Use {@link FieldCache.DoubleParser}, this will be removed in Lucene 3.0 + + + + Return an long representation of this field's value. + + + Token Manager Error. + + + Lexical error occurred. + + + An attempt was made to create a second instance of a static token manager. + + + Tried to change to an invalid lexical state. + + + Detected (and bailed out of) an infinite loop in the token manager. + + + Indicates the reason why the exception is thrown. It will have + one of the above 4 values. + + + + Replaces unprintable characters by their escaped (or unicode escaped) + equivalents in the given string + + + + Returns a detailed message for the Error when it is thrown by the + token manager to indicate a lexical error. + Parameters : + EOFSeen : indicates if EOF caused the lexical error + curLexState : lexical state in which this error occurred + errorLine : line number when the error occurred + errorColumn : column number when the error occurred + errorAfter : prefix that was seen before this error occurred + curchar : the offending character + Note: You can customize the lexical error message by modifying this method. + + + + No arg constructor. + + + Constructor with message and reason. + + + Full Constructor. + + + You can also modify the body of this method to customize your error messages. + For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not + of end-users concern, so you can return something like : + + "Internal Error : Please file a bug report .... " + + from this method for such cases in the release version of your parser. + + + + An IndexReader which reads multiple indexes, appending their content. + + + $Id: MultiReader.java 782406 2009-06-07 16:31:18Z mikemccand $ + + + +

Construct a MultiReader aggregating the named set of (sub)readers. + Directory locking for delete, undeleteAll, and setNorm operations is + left to the subreaders.

+

Note that all subreaders are closed if this Multireader is closed.

+

+ set of (sub)readers + + IOException +
+ +

Construct a MultiReader aggregating the named set of (sub)readers. + Directory locking for delete, undeleteAll, and setNorm operations is + left to the subreaders.

+

+ indicates whether the subreaders should be closed + when this MultiReader is closed + + set of (sub)readers + + IOException +
+ + Tries to reopen the subreaders. +
+ If one or more subreaders could be re-opened (i. e. subReader.reopen() + returned a new instance != subReader), then a new MultiReader instance + is returned, otherwise this instance is returned. +

+ A re-opened instance might share one or more subreaders with the old + instance. Index modification operations result in undefined behavior + when performed before the old instance is closed. + (see {@link IndexReader#Reopen()}). +

+ If subreaders are shared, then the reference count of those + readers is increased to ensure that the subreaders remain open + until the last referring reader is closed. + +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Clones the subreaders. + (see {@link IndexReader#clone()}). +
+

+ If subreaders are shared, then the reference count of those + readers is increased to ensure that the subreaders remain open + until the last referring reader is closed. +

+
+ + If clone is true then we clone each of the subreaders + + + New IndexReader, or same one (this) if + reopen/clone is not necessary + + CorruptIndexException + IOException + + + + + + + Checks recursively if all subreaders are up to date. + + + Not implemented. + UnsupportedOperationException + + + Remaps docIDs after a merge has completed, where the + merged segments had at least one deletion. This is used + to renumber the buffered deletes in IndexWriter when a + merge of segments with deletions commits. + + + + Used by DocumentsWriter to merge the postings from + multiple ThreadStates when creating a segment + + + + Add a new position & payload. If payloadLength > 0 + you must read those bytes from the IndexInput. + + + + Called when we are done adding positions & payloads + + + This is a DocFieldConsumer that inverts each field, + separately, from a Document, and accepts a + InvertedTermsConsumer to process those terms. + + + + This is a DocFieldConsumer that inverts each field, + separately, from a Document, and accepts a + InvertedTermsConsumer to process those terms. + + + + Called when DocumentsWriter decides to create a new + segment + + + + Called when DocumentsWriter decides to close the doc + stores + + + + Called when an aborting exception is hit + + + Add a new thread + + + Called when DocumentsWriter is using too much RAM. + The consumer should free RAM, if possible, returning + true if any RAM was in fact freed. + + + + Class to write byte streams into slices of shared + byte[]. This is used by DocumentsWriter to hold the + posting list for many terms in RAM. + + + + Set up the writer to write at address. + + + Write byte into byte slice stream + + + Abstract base class for input from a file in a {@link Directory}. A + random-access input stream. Used for all Lucene index input operations. + + + + + + Reads and returns a single byte. + + + + + Reads a specified number of bytes into an array at the specified offset. + the array to read bytes into + + the offset in the array to start storing bytes + + the number of bytes to read + + + + + + Reads a specified number of bytes into an array at the + specified offset with control over whether the read + should be buffered (callers who have their own buffer + should pass in "false" for useBuffer). Currently only + {@link BufferedIndexInput} respects this parameter. + + the array to read bytes into + + the offset in the array to start storing bytes + + the number of bytes to read + + set to false if the caller will handle + buffering. + + + + + + Reads four bytes and returns an int. + + + + + Reads an int stored in variable-length format. Reads between one and + five bytes. Smaller values take fewer bytes. Negative numbers are not + supported. + + + + + + Reads eight bytes and returns a long. + + + + + Reads a long stored in variable-length format. Reads between one and + nine bytes. Smaller values take fewer bytes. Negative numbers are not + supported. + + + + Call this if readString should read characters stored + in the old modified UTF8 format (length in java chars + and java's modified UTF8 encoding). This is used for + indices written pre-2.4 See LUCENE-510 for details. + + + + Reads a string. + + + + + Reads Lucene's old "modified UTF-8" encoded + characters into an array. + + the array to read characters into + + the offset in the array to start storing characters + + the number of characters to read + + + + -- please use readString or readBytes + instead, and construct the string + from those utf8 bytes + + + + Expert + + Similar to {@link #ReadChars(char[], int, int)} but does not do any conversion operations on the bytes it is reading in. It still + has to invoke {@link #ReadByte()} just as {@link #ReadChars(char[], int, int)} does, but it does not need a buffer to store anything + and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine + how many more bytes to read + + The number of chars to read + + this method operates on old "modified utf8" encoded + strings + + + + Closes the stream to futher operations. + + + Returns the current position in this file, where the next read will + occur. + + + + + + Sets current position in this file, where the next read will occur. + + + + + The number of bytes in the file. + + + Returns a clone of this stream. + +

Clones of a stream access the same data, and are positioned at the same + point as the stream they were cloned from. + +

Expert: Subclasses must ensure that clones may be positioned at + different points in the input from each other and from the stream they + were cloned from. +

+
+ + An Analyzer that uses {@link WhitespaceTokenizer}. + + + This class wraps a Token and supplies a single attribute instance + where the delegate token can be replaced. + + Will be removed, when old TokenStream API is removed. + + + + The term text of a Token. + + + Returns the Token's term text. + + This method has a performance penalty + because the text is stored internally in a char[]. If + possible, use {@link #TermBuffer()} and {@link + #TermLength()} directly instead. If you really need a + String, use this method, which is nothing more than + a convenience call to new String(token.termBuffer(), 0, token.termLength()) + + + + Copies the contents of buffer, starting at offset for + length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Copies the contents of buffer into the termBuffer array. + the buffer to copy + + + + Copies the contents of buffer, starting at offset and continuing + for length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Returns the internal termBuffer character array which + you can then directly alter. If the array is too + small for your token, use {@link + #ResizeTermBuffer(int)} to increase it. After + altering the buffer be sure to call {@link + #setTermLength} to record the number of valid + characters that were placed into the termBuffer. + + + + Grows the termBuffer to at least size newSize, preserving the + existing content. Note: If the next operation is to change + the contents of the term buffer use + {@link #SetTermBuffer(char[], int, int)}, + {@link #SetTermBuffer(String)}, or + {@link #SetTermBuffer(String, int, int)} + to optimally combine the resize with the setting of the termBuffer. + + minimum size of the new termBuffer + + newly created termBuffer with length >= newSize + + + + Return number of valid characters (length of the term) + in the termBuffer array. + + + + Set number of valid characters (length of the term) in + the termBuffer array. Use this to truncate the termBuffer + or to synchronize with external manipulation of the termBuffer. + Note: to grow the size of the array, + use {@link #ResizeTermBuffer(int)} first. + + the truncated length + + + + A Token's lexical type. The Default value is "word". + + + Returns this Token's lexical type. Defaults to "word". + + + Set the lexical type. + + + + + The positionIncrement determines the position of this token + relative to the previous Token in a TokenStream, used in phrase + searching. + +

The default value is one. + +

Some common uses for this are:

    + +
  • Set it to zero to put multiple terms in the same position. This is + useful if, e.g., a word has multiple stems. Searches for phrases + including either stem will match. In this case, all but the first stem's + increment should be set to zero: the increment of the first instance + should be one. Repeating a token with an increment of zero can also be + used to boost the scores of matches on that token.
  • + +
  • Set it to values greater than one to inhibit exact phrase matches. + If, for example, one does not want phrases to match across removed stop + words, then one could build a stop word filter that removes stop words and + also sets the increment to the number of stop words removed before each + non-stop word. Then exact phrase queries will only match when the terms + occur with no intervening stop words.
  • + +
+ +
+ + +
+ + Set the position increment. The default value is one. + + + the distance from the prior term + + + + Returns the position increment of this Token. + + + + + This attribute can be used to pass different flags down the {@link Tokenizer} chain, + eg from one TokenFilter to another one. + + + + EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. +

+ + Get the bitset for any bits that have been set. This is completely distinct from {@link TypeAttribute#Type()}, although they do share similar purposes. + The flags can be used to encode information about the token for use by other {@link Lucene.Net.Analysis.TokenFilter}s. + + +

+ The bits + +
+ + + + + + The payload of a Token. See also {@link Payload}. + + + Returns this Token's payload. + + + Sets this Token's payload. + + + The positionIncrement determines the position of this token + relative to the previous Token in a {@link TokenStream}, used in phrase + searching. + +

The default value is one. + +

Some common uses for this are:

    + +
  • Set it to zero to put multiple terms in the same position. This is + useful if, e.g., a word has multiple stems. Searches for phrases + including either stem will match. In this case, all but the first stem's + increment should be set to zero: the increment of the first instance + should be one. Repeating a token with an increment of zero can also be + used to boost the scores of matches on that token.
  • + +
  • Set it to values greater than one to inhibit exact phrase matches. + If, for example, one does not want phrases to match across removed stop + words, then one could build a stop word filter that removes stop words and + also sets the increment to the number of stop words removed before each + non-stop word. Then exact phrase queries will only match when the terms + occur with no intervening stop words.
  • + +
+
+
+ + Set the position increment. The default value is one. + + + the distance from the prior term + + + + Returns the position increment of this Token. + + + + + A filter that replaces accented characters in the ISO Latin 1 character set + (ISO-8859-1) by their unaccented equivalent. The case will not be altered. +

+ For instance, 'À' will be replaced by 'a'. +

+ +

+ in favor of {@link ASCIIFoldingFilter} which covers a superset + of Latin 1. This class will be removed in Lucene 3.0. + +
+ + Will be removed in Lucene 3.0. This method is final, as it should + not be overridden. Delegates to the backwards compatibility layer. + + + + Will be removed in Lucene 3.0. This method is final, as it should + not be overridden. Delegates to the backwards compatibility layer. + + + + To replace accented characters in a String by unaccented equivalents. + + + CharReader is a Reader wrapper. It reads chars from + Reader and outputs {@link CharStream}, defining an + identify function {@link #CorrectOffset} method that + simply returns the provided offset. + + + + CharStream adds {@link #CorrectOffset} + functionality over {@link Reader}. All Tokenizers accept a + CharStream instead of {@link Reader} as input, which enables + arbitrary character based filtering before tokenization. + The {@link #CorrectOffset} method fixed offsets to account for + removal or insertion of characters, so that the offsets + reported in the tokens match the character offsets of the + original Reader. + + + + Called by CharFilter(s) and Tokenizer to correct token offset. + + + offset as seen in the output + + corrected offset based on the input + + + + Simple cache implementation that uses a HashMap to store (key, value) pairs. + This cache is not synchronized, use {@link Cache#SynchronizedCache(Cache)} + if needed. + + + + Returns a Set containing all keys in this cache. + + + + The maximum number of items to cache. + + + + + The list to efficiently maintain the LRU state. + + + + + The dictionary to hash into any location in the list. + + + + + The node instance to use/re-use when adding an item to the cache. + + + + + Container to hold the key and value to aid in removal from + the dictionary when an item is removed from cache. + + + + Methods for manipulating arrays. + + + Parses the string argument as if it was an int value and returns the + result. Throws NumberFormatException if the string does not represent an + int quantity. + + + a string representation of an int quantity. + + int the value represented by the argument + + NumberFormatException if the argument could not be parsed as an int quantity. + + + Parses a char array into an int. + the character array + + The offset into the array + + The length + + the int + + NumberFormatException if it can't parse + + + Parses the string argument as if it was an int value and returns the + result. Throws NumberFormatException if the string does not represent an + int quantity. The second argument specifies the radix to use when parsing + the value. + + + a string representation of an int quantity. + + the base to use for conversion. + + int the value represented by the argument + + NumberFormatException if the argument could not be parsed as an int quantity. + + + Returns hash of chars in range start (inclusive) to + end (inclusive) + + + + Returns hash of chars in range start (inclusive) to + end (inclusive) + + + + Expert: allocate a new buffer. + Subclasses can allocate differently. + + size of allocated buffer. + + allocated buffer. + + + + + Base class for Directory implementations that store index + files in the file system. There are currently three core + subclasses: + + + + Unfortunately, because of system peculiarities, there is + no single overall best implementation. Therefore, we've + added the {@link #open} method, to allow Lucene to choose + the best FSDirectory implementation given your + environment, and the known limitations of each + implementation. For users who have no reason to prefer a + specific implementation, it's best to simply use {@link + #open}. For all others, you should instantiate the + desired implementation directly. + +

The locking implementation is by default {@link + NativeFSLockFactory}, but can be changed by + passing in a custom {@link LockFactory} instance. + The deprecated getDirectory methods default to use + {@link SimpleFSLockFactory} for backwards compatibility. + The system properties + org.apache.lucene.store.FSDirectoryLockFactoryClass + and org.apache.lucene.FSDirectory.class + are deprecated and only used by the deprecated + getDirectory methods. The system property + org.apache.lucene.lockDir is ignored completely, + If you really want to store locks + elsewhere, you can create your own {@link + SimpleFSLockFactory} (or {@link NativeFSLockFactory}, + etc.) passing in your preferred lock directory. + +

In 3.0 this class will become abstract. + +

+ + +
+ + This cache of directories ensures that there is a unique Directory + instance per path, so that synchronization on the Directory can be used to + synchronize access between readers and writers. We use + refcounts to ensure when the last use of an FSDirectory + instance for a given canonical path is closed, we remove the + instance from the cache. See LUCENE-776 + for some relevant discussion. + + Not used by any non-deprecated methods anymore + + + + Set whether Lucene's use of lock files is disabled. By default, + lock files are enabled. They should only be disabled if the index + is on a read-only medium like a CD-ROM. + + Use a {@link #open(File, LockFactory)} or a constructor + that takes a {@link LockFactory} and supply + {@link NoLockFactory#getNoLockFactory}. This setting does not work + with {@link #open(File)} only the deprecated getDirectory + respect this setting. + + + + Returns whether Lucene's use of lock files is disabled. + true if locks are disabled, false if locks are enabled. + + + + Use a constructor that takes a {@link LockFactory} and + supply {@link NoLockFactory#getNoLockFactory}. + + + + The default class which implements filesystem-based directories. + + + A buffer optionally used in renameTo method + + + Returns the directory instance for the named location. + + + Use {@link #Open(File)} + + + the path to the directory. + + the FSDirectory for the named file. + + + + Returns the directory instance for the named location. + + + Use {@link #Open(File, LockFactory)} + + + the path to the directory. + + instance of {@link LockFactory} providing the + locking implementation. + + the FSDirectory for the named file. + + + + Returns the directory instance for the named location. + + + Use {@link #Open(File)} + + + the path to the directory. + + the FSDirectory for the named file. + + + + Returns the directory instance for the named location. + + + Use {@link #Open(File)} + + + the path to the directory. + + the FSDirectory for the named file. + + + + Returns the directory instance for the named location. + + + Use {@link #Open(File, LockFactory)} + + + the path to the directory. + + instance of {@link LockFactory} providing the + locking implementation. + + the FSDirectory for the named file. + + + + Returns the directory instance for the named location. + + + Use {@link #Open(File, LockFactory)} + + + the path to the directory. + + instance of {@link LockFactory} providing the + locking implementation. + + the FSDirectory for the named file. + + + + Returns the directory instance for the named location. + + + Use IndexWriter's create flag, instead, to + create a new index. + + + the path to the directory. + + if true, create, or erase any existing contents. + + the FSDirectory for the named file. + + + + Returns the directory instance for the named location. + + + Use IndexWriter's create flag, instead, to + create a new index. + + + the path to the directory. + + if true, create, or erase any existing contents. + + the FSDirectory for the named file. + + + + Returns the directory instance for the named location. + + + Use IndexWriter's create flag, instead, to + create a new index. + + + the path to the directory. + + if true, create, or erase any existing contents. + + the FSDirectory for the named file. + + + + + + + + Initializes the directory to create a new file with the given name. + This method should be used in {@link #createOutput}. + + + + The underlying filesystem directory + + + + + + + + + + + Create a new FSDirectory for the named location (ctor for subclasses). + the path of the directory + + the lock factory to use, or null for the default + ({@link NativeFSLockFactory}); + + IOException + + + Creates an FSDirectory instance, trying to pick the + best implementation given the current environment. + The directory returned uses the {@link NativeFSLockFactory}. + +

Currently this returns {@link SimpleFSDirectory} as + NIOFSDirectory is currently not supported. + +

Currently this returns {@link SimpleFSDirectory} as + NIOFSDirectory is currently not supported. + +

NOTE: this method may suddenly change which + implementation is returned from release to release, in + the event that higher performance defaults become + possible; if the precise implementation is important to + your application, please instantiate it directly, + instead. On 64 bit systems, it may also good to + return {@link MMapDirectory}, but this is disabled + because of officially missing unmap support in Java. + For optimal performance you should consider using + this implementation on 64 bit JVMs. + +

See above +

+
+ + Creates an FSDirectory instance, trying to pick the + best implementation given the current environment. + The directory returned uses the {@link NativeFSLockFactory}. + +

Currently this returns {@link SimpleFSDirectory} as + NIOFSDirectory is currently not supported. + +

NOTE: this method may suddenly change which + implementation is returned from release to release, in + the event that higher performance defaults become + possible; if the precise implementation is important to + your application, please instantiate it directly, + instead. On 64 bit systems, it may also good to + return {@link MMapDirectory}, but this is disabled + because of officially missing unmap support in Java. + For optimal performance you should consider using + this implementation on 64 bit JVMs. + +

See above +

+
+ + Just like {@link #Open(File)}, but allows you to + also specify a custom {@link LockFactory}. + + + + Lists all files (not subdirectories) in the + directory. This method never returns null (throws + {@link IOException} instead). + + + NoSuchDirectoryException if the directory + does not exist, or does exist but is not a + directory. + + IOException if list() returns null + + + Lists all files (not subdirectories) in the + directory. This method never returns null (throws + {@link IOException} instead). + + + NoSuchDirectoryException if the directory + does not exist, or does exist but is not a + directory. + + IOException if list() returns null + + + Lists all files (not subdirectories) in the + directory. + + + + + + Returns true iff a file with the given name exists. + + + Returns the time the named file was last modified. + + + Returns the time the named file was last modified. + + + Set the modified time of an existing file to now. + + + Returns the length in bytes of a file in the directory. + + + Removes an existing file in the directory. + + + Renames an existing file in the directory. + Warning: This is not atomic. + + + + + + Creates an IndexOutput for the file with the given name. + In 3.0 this method will become abstract. + + + + Creates an IndexInput for the file with the given name. + In 3.0 this method will become abstract. + + + + So we can do some byte-to-hexchar conversion below + + + Closes the store to future operations. + + + For debug output. + + + Default read chunk size. This is a conditional + default: on 32bit JVMs, it defaults to 100 MB. On + 64bit JVMs, it's Integer.MAX_VALUE. + + + + + + Sets the maximum number of bytes read at once from the + underlying file during {@link IndexInput#readBytes}. + The default value is {@link #DEFAULT_READ_CHUNK_SIZE}; + +

This was introduced due to Sun + JVM Bug 6478546, which throws an incorrect + OutOfMemoryError when attempting to read too many bytes + at once. It only happens on 32bit JVMs with a large + maximum heap size.

+ +

Changes to this value will not impact any + already-opened {@link IndexInput}s. You should call + this before attempting to open an index on the + directory.

+ +

NOTE: This value should be as large as + possible to reduce any possible performance impact. If + you still encounter an incorrect OutOfMemoryError, + trying lowering the chunk size.

+

+
+ + The maximum number of bytes to read at once from the + underlying file during {@link IndexInput#readBytes}. + + + + + + Use SimpleFSDirectory.SimpleFSIndexInput instead + + + + Base implementation class for buffered {@link IndexInput}. + + + Default buffer size + + + Inits BufferedIndexInput with a specific bufferSize + + + Change the buffer size used by this IndexInput + + + + + + + Expert: implements buffer refill. Reads bytes from the current position + in the input. + + the array to read bytes into + + the offset in the array to start storing bytes + + the number of bytes to read + + + + Expert: implements seek. Sets current position in this file, where the + next {@link #ReadInternal(byte[],int,int)} will occur. + + + + + + A straightforward implementation of {@link FSDirectory} + using java.io.RandomAccessFile. However, this class has + poor concurrent performance (multiple threads will + bottleneck) as it synchronizes when multiple threads + read from the same file. It's usually better to use + {@link NIOFSDirectory} or {@link MMapDirectory} instead. + + + + Create a new SimpleFSDirectory for the named location. + + + the path of the directory + + the lock factory to use, or null for the default. + + IOException + + + Create a new SimpleFSDirectory for the named location. + + + the path of the directory + + the lock factory to use, or null for the default. + + IOException + + + Create a new SimpleFSDirectory for the named location and the default lock factory. + + + the path of the directory + + IOException + + + + + + + Create a new SimpleFSDirectory for the named location and the default lock factory. + + + the path of the directory + + IOException + + + Creates an IndexOutput for the file with the given name. + + + Creates an IndexInput for the file with the given name. + + + Please use ctor taking chunkSize + + + + Please use ctor taking chunkSize + + + + IndexInput methods + + + Method used for testing. Returns true if the underlying + file descriptor is valid. + + + + Base implementation class for buffered {@link IndexOutput}. + + + Abstract base class for output to a file in a Directory. A random-access + output stream. Used for all Lucene index output operations. + + + + + + + + Writes a single byte. + + + + + Writes an array of bytes. + the bytes to write + + the number of bytes to write + + + + + + Writes an array of bytes. + the bytes to write + + the offset in the byte array + + the number of bytes to write + + + + + + Writes an int as four bytes. + + + + + Writes an int in a variable-length format. Writes between one and + five bytes. Smaller values take fewer bytes. Negative numbers are not + supported. + + + + + + Writes a long as eight bytes. + + + + + Writes an long in a variable-length format. Writes between one and five + bytes. Smaller values take fewer bytes. Negative numbers are not + supported. + + + + + + Writes a string. + + + + + Writes a sub sequence of characters from s as the old + format (modified UTF-8 encoded bytes). + + the source of the characters + + the first character in the sequence + + the number of characters in the sequence + + -- please pre-convert to utf8 bytes + instead or use {@link #writeString} + + + + Writes a sub sequence of characters from char[] as + the old format (modified UTF-8 encoded bytes). + + the source of the characters + + the first character in the sequence + + the number of characters in the sequence + + -- please pre-convert to utf8 bytes instead or use {@link #writeString} + + + + Copy numBytes bytes from input to ourself. + + + Forces any buffered output to be written. + + + Closes this stream to further operations. + + + Returns the current position in this file, where the next write will + occur. + + + + + + Sets current position in this file, where the next write will occur. + + + + + The number of bytes in the file. + + + Set the file length. By default, this method does + nothing (it's optional for a Directory to implement + it). But, certain Directory implementations (for + + can use this to inform the + underlying IO system to pre-allocate the file to the + specified size. If the length is longer than the + current file length, the bytes added to the file are + undefined. Otherwise the file is truncated. + + file length + + + + Writes a single byte. + + + + + Writes an array of bytes. + the bytes to write + + the number of bytes to write + + + + + + Forces any buffered output to be written. + + + Expert: implements buffer write. Writes bytes at the current position in + the output. + + the bytes to write + + the number of bytes to write + + + + Expert: implements buffer write. Writes bytes at the current position in + the output. + + the bytes to write + + the offset in the byte array + + the number of bytes to write + + + + Closes this stream to further operations. + + + Returns the current position in this file, where the next write will + occur. + + + + + + Sets current position in this file, where the next write will occur. + + + + + The number of bytes in the file. + + + output methods: + + + Random-access methods + + + + + + + + + + + + + + + + + + + Use SimpleFSDirectory.SimpleFSIndexOutput instead + + + + + + + + Subclass of FilteredTermEnum for enumerating all terms that match the + specified wildcard filter term. +

+ Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. + +

+ $Id: WildcardTermEnum.java 783371 2009-06-10 14:39:56Z mikemccand $ + +
+ + ***************************************** + String equality with support for wildcards + ****************************************** + + + + Creates a new WildcardTermEnum. +

+ After calling the constructor the enumeration is already pointing to the first + valid term if such a term exists. +

+
+ + Determines if a word matches a wildcard pattern. + Work released by Granta Design Ltd after originally being done on + company time. + + + + A Filter that restricts search results to a range of values in a given + field. + +

This filter matches the documents looking for terms that fall into the + supplied range according to {@link String#compareTo(String)}. It is not intended + for numerical ranges, use {@link NumericRangeFilter} instead. + +

If you construct a large number of range filters with different ranges but on the + same field, {@link FieldCacheRangeFilter} may have significantly better performance. +

+ 2.9 + +
+ + The field this range applies to + + The lower bound on this range + + The upper bound on this range + + Does this range include the lower bound? + + Does this range include the upper bound? + + IllegalArgumentException if both terms are null or if + lowerTerm is null and includeLower is true (similar for upperTerm + and includeUpper) + + + + WARNING: Using this constructor and supplying a non-null + value in the collator parameter will cause every single + index Term in the Field referenced by lowerTerm and/or upperTerm to be + examined. Depending on the number of index Terms in this Field, the + operation could be very slow. + + + The lower bound on this range + + The upper bound on this range + + Does this range include the lower bound? + + Does this range include the upper bound? + + The collator to use when determining range inclusion; set + to null to use Unicode code point ordering instead of collation. + + IllegalArgumentException if both terms are null or if + lowerTerm is null and includeLower is true (similar for upperTerm + and includeUpper) + + + + Constructs a filter for field fieldName matching + less than or equal to upperTerm. + + + + Constructs a filter for field fieldName matching + greater than or equal to lowerTerm. + + + + Returns the field name for this filter + + + Returns the lower value of this range filter + + + Returns the upper value of this range filter + + + Returns true if the lower endpoint is inclusive + + + Returns true if the upper endpoint is inclusive + + + Returns the collator used to determine range inclusion, if any. + + + Removes matches which overlap with another SpanQuery. + + + Base class for span-based queries. + + + Expert: Returns the matches for this query in an index. Used internally + to search for spans. + + + + Returns the name of the field matched by this query. + + + Returns a collection of all terms matched by this query. + use extractTerms instead + + + + + + Construct a SpanNotQuery matching spans from include which + have no overlap with spans from exclude. + + + + Return the SpanQuery whose matches are filtered. + + + Return the SpanQuery whose matches must not overlap those returned. + + + Returns a collection of all terms matched by this query. + use extractTerms instead + + + + + + Returns true iff o is equal to this. + + + Expert: Scoring functionality for phrase queries. +
A document is considered matching if it contains the phrase-query terms + at "valid" positons. What "valid positions" are + depends on the type of the phrase query: for an exact phrase query terms are required + to appear in adjacent locations, while for a sloppy phrase query some distance between + the terms is allowed. The abstract method {@link #PhraseFreq()} of extending classes + is invoked for each document containing all the phrase query terms, in order to + compute the frequency of the phrase query in that document. A non zero frequency + means a match. +
+
+ + use {@link #DocID()} instead. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + For a document containing all the phrase query terms, compute the + frequency of the phrase in that document. + A non zero frequency means a match. +
Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations. +
+ frequency of the phrase in current doc, 0 if not found. + +
+ + Implements search over a set of Searchables. + +

Applications usually need only call the inherited {@link #Search(Query)} + or {@link #Search(Query,Filter)} methods. +

+
+ + An abstract base class for search implementations. Implements the main search + methods. + +

+ Note that you can only access hits from a Searcher as long as it is not yet + closed, otherwise an IOException will be thrown. +

+
+ + Returns the documents matching query. + BooleanQuery.TooManyClauses + Hits will be removed in Lucene 3.0. Use + {@link #Search(Query, Filter, int)} instead. + + + + Returns the documents matching query and + filter. + + BooleanQuery.TooManyClauses + Hits will be removed in Lucene 3.0. Use + {@link #Search(Query, Filter, int)} instead. + + + + Returns documents matching query sorted by + sort. + + BooleanQuery.TooManyClauses + Hits will be removed in Lucene 3.0. Use + {@link #Search(Query, Filter, int, Sort)} instead. + + + + Returns documents matching query and filter, + sorted by sort. + + BooleanQuery.TooManyClauses + Hits will be removed in Lucene 3.0. Use + {@link #Search(Query, Filter, int, Sort)} instead. + + + + Search implementation with arbitrary sorting. Finds + the top n hits for query, applying + filter if non-null, and sorting the hits by the criteria in + sort. + +

NOTE: this does not compute scores by default; use + {@link IndexSearcher#setDefaultFieldSortScoring} to enable scoring. + +

+ BooleanQuery.TooManyClauses +
+ + Lower-level search API. + +

{@link HitCollector#Collect(int,float)} is called for every matching + document. + +

Applications should only use this if they need all of the + matching documents. The high-level search API ({@link + Searcher#Search(Query)}) is usually more efficient, as it skips + non-high-scoring hits. +

Note: The score passed to this method is a raw score. + In other words, the score will not necessarily be a float whose value is + between 0 and 1. +

+ BooleanQuery.TooManyClauses + use {@link #Search(Query, Collector)} instead. + +
+ + Lower-level search API. + +

{@link Collector#Collect(int)} is called for every matching document. + +

Applications should only use this if they need all of the matching + documents. The high-level search API ({@link Searcher#Search(Query, int)} + ) is usually more efficient, as it skips non-high-scoring hits. +

Note: The score passed to this method is a raw score. + In other words, the score will not necessarily be a float whose value is + between 0 and 1. +

+ BooleanQuery.TooManyClauses +
+ + Lower-level search API. + +

{@link HitCollector#Collect(int,float)} is called for every matching + document. +
HitCollector-based access to remote indexes is discouraged. + +

Applications should only use this if they need all of the + matching documents. The high-level search API ({@link + Searcher#Search(Query, Filter, int)}) is usually more efficient, as it skips + non-high-scoring hits. + +

+ to match documents + + if non-null, used to permit documents to be collected. + + to receive hits + + BooleanQuery.TooManyClauses + use {@link #Search(Query, Filter, Collector)} instead. + +
+ + Lower-level search API. + +

{@link Collector#Collect(int)} is called for every matching + document. +
Collector-based access to remote indexes is discouraged. + +

Applications should only use this if they need all of the + matching documents. The high-level search API ({@link + Searcher#Search(Query, Filter, int)}) is usually more efficient, as it skips + non-high-scoring hits. + +

+ to match documents + + if non-null, used to permit documents to be collected. + + to receive hits + + BooleanQuery.TooManyClauses +
+ + Finds the top n + hits for query, applying filter if non-null. + + + BooleanQuery.TooManyClauses + + + Finds the top n + hits for query. + + + BooleanQuery.TooManyClauses + + + Returns an Explanation that describes how doc scored against + query. + +

This is intended to be used in developing Similarity implementations, + and, for good performance, should not be displayed with every hit. + Computing an explanation is as expensive as executing the query over the + entire index. +

+
+ + The Similarity implementation used by this searcher. + + + Expert: Set the Similarity implementation used by this Searcher. + + + + + + + Expert: Return the Similarity implementation used by this Searcher. + +

This defaults to the current value of {@link Similarity#GetDefault()}. +

+
+ + creates a weight for query + new weight + + + + use {@link #Search(Weight, Filter, Collector)} instead. + + + + Creates a searcher which searches searchers. + + + Return the array of {@link Searchable}s this searches. + + + Returns index of the searcher for document n in the array + used to construct this searcher. + + + + Returns the document number of document n within its + sub-index. + + + + Create weight in multiple index scenario. + + Distributed query processing is done in the following steps: + 1. rewrite query + 2. extract necessary terms + 3. collect dfs for these terms from the Searchables + 4. create query weight using aggregate dfs. + 5. distribute that weight to Searchables + 6. merge results + + Steps 1-4 are done here, 5+6 in the search() methods + + + rewritten queries + + + + Document Frequency cache acting as a Dummy-Searcher. This class is no + full-fledged Searcher, but only supports the methods necessary to + initialize Weights. + + + + Expert: obtains the ordinal of the field value from the default Lucene + {@link Lucene.Net.Search.FieldCache FieldCache} using getStringIndex() + and reverses the order. +

+ The native lucene index order is used to assign an ordinal value for each field value. +

+ Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. +
+ Example of reverse ordinal (rord): +
If there were only three field values: "apple","banana","pear" +
then rord("apple")=3, rord("banana")=2, ord("pear")=1 +

+ WARNING: + rord() depends on the position in an index and can thus change + when other documents are inserted or deleted, + or if a MultiSearcher is used. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

NOTE: with the switch in 2.9 to segment-based + searching, if {@link #getValues} is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API. Alternatively, for + a short-term fix, you could wrap your ValueSource using + {@link MultiValueSource}, which costs more CPU per lookup + but will not consume double the FieldCache RAM.

+

+
+ + Contructor for a certain field. + field whose values reverse order is used. + + + + Expert: obtains float field values from the + {@link Lucene.Net.Search.FieldCache FieldCache} + using getFloats() and makes those values + available as other numeric types, casting as needed. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+ for requirements" + on the field. + +

NOTE: with the switch in 2.9 to segment-based + searching, if {@link #getValues} is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API. Alternatively, for + a short-term fix, you could wrap your ValueSource using + {@link MultiValueSource}, which costs more CPU per lookup + but will not consume double the FieldCache RAM.

+ + + +

Expert: A base class for ValueSource implementations that retrieve values for + a single field from the {@link Lucene.Net.Search.FieldCache FieldCache}. +

+ Fields used herein nust be indexed (doesn't matter if these fields are stored or not). +

+ It is assumed that each such indexed field is untokenized, or at least has a single token in a document. + For documents with multiple tokens of the same field, behavior is undefined (It is likely that current + code would use the value of one of these tokens, but this is not guaranteed). +

+ Document with no tokens in this field are assigned the Zero value. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

NOTE: with the switch in 2.9 to segment-based + searching, if {@link #getValues} is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API. Alternatively, for + a short-term fix, you could wrap your ValueSource using + {@link MultiValueSource}, which costs more CPU per lookup + but will not consume double the FieldCache RAM.

+

+
+ + Create a cached field source for the input field. + + + Return cached DocValues for input field and reader. + FieldCache so that values of a field are loaded once per reader (RAM allowing) + + Field for which values are required. + + + + + + Check if equals to another {@link FieldCacheSource}, already knowing that cache and field are equal. + + + + + Return a hash code of a {@link FieldCacheSource}, without the hash-codes of the field + and the cache (those are taken care of elsewhere). + + + + + + Create a cached float field source with default string-to-float parser. + + + Create a cached float field source with a specific string-to-float parser. + + + A query that applies a filter to the results of another query. + +

Note: the bits are retrieved from the filter each time this + query is used in a search - use a CachingWrapperFilter to avoid + regenerating the bits every time. + +

Created: Apr 20, 2004 8:58:29 AM + +

+ 1.4 + + $Id: FilteredQuery.java 807821 2009-08-25 21:55:49Z mikemccand $ + + + +
+ + Constructs a new query which applies a filter to the results of the original query. + Filter.getDocIdSet() will be called every time this query is used in a search. + + Query to be filtered, cannot be null. + + Filter to apply to query results, cannot be null. + + + + Returns a Weight that applies the filter to the enclosed query's Weight. + This is accomplished by overriding the Scorer returned by the Weight. + + + + Rewrites the wrapped query. + + + Prints a user-readable version of this query. + + + Returns true iff o is equal to this. + + + Returns a hash code value for this object. + + + use {@link #NextDoc()} instead. + + + + use {@link #DocID()} instead. + + + + use {@link #Advance(int)} instead. + + + + Wraps another SpanFilter's result and caches it. The purpose is to allow + filters to simply filter, and then wrap with this class to add caching. + + + + A transient Filter cache. + + + Filter to cache results of + + + + Use {@link #GetDocIdSet(IndexReader)} instead. + + + + This exception is thrown when parse errors are encountered. + You can explicitly create objects of this exception type by + calling the method generateParseException in the generated + parser. + + You can modify this class to customize your error reporting + mechanisms so long as you retain the public fields. + + + + This constructor is used by the method "generateParseException" + in the generated parser. Calling this constructor generates + a new object of this type with the fields "currentToken", + "expectedTokenSequences", and "tokenImage" set. The boolean + flag "specialConstructor" is also set to true to indicate that + this constructor was used to create this object. + This constructor calls its super class with the empty string + to force the "toString" method of parent class "Throwable" to + print the error message in the form: + ParseException: <result of getMessage> + + + + The following constructors are for use by you for whatever + purpose you can think of. Constructing the exception in this + manner makes the exception behave in the normal way - i.e., as + documented in the class "Throwable". The fields "errorToken", + "expectedTokenSequences", and "tokenImage" do not contain + relevant information. The JavaCC generated code does not use + these constructors. + + + + Constructor with message. + + + Constructor with message. + + + This variable determines which constructor was used to create + this object and thereby affects the semantics of the + "getMessage" method (see below). + + + + This is the last token that has been consumed successfully. If + this object has been created due to a parse error, the token + followng this token will (therefore) be the first error token. + + + + Each entry in this array is an array of integers. Each array + of integers represents a sequence of tokens (by their ordinal + values) that is expected at this point of the parse. + + + + This is a reference to the "tokenImage" array of the generated + parser within which the parse error occurred. This array is + defined in the generated ...Constants interface. + + + + The end of line string for this machine. + + + Used to convert raw characters to their escaped version + when these raw version cannot be used as part of an ASCII + string literal. + + + + This method has the standard behavior when this object has been + created using the standard constructors. Otherwise, it uses + "currentToken" and "expectedTokenSequences" to generate a parse + error message and returns it. If this object has been created + due to a parse error, and you do not catch it (it gets thrown + from the parser), then this method is called during the printing + of the final stack trace, and hence the correct error message + gets displayed. + + + + Default implementation of Message interface. + For Native Language Support (NLS), system of software internationalization. + + + + Message Interface for a lazy loading. + For Native Language Support (NLS), system of software internationalization. + + + + A TermInfo is the record of information stored for a term. + + + The number of documents which contain the term. + + +

[Note that as of 2.1, all but one of the + methods in this class are available via {@link + IndexWriter}. The one method that is not available is + {@link #DeleteDocument(int)}.]

+ + A class to modify an index, i.e. to delete and add documents. This + class hides {@link IndexReader} and {@link IndexWriter} so that you + do not need to care about implementation details such as that adding + documents is done via IndexWriter and deletion is done via IndexReader. + +

Note that you cannot create more than one IndexModifier object + on the same directory at the same time. + +

Example usage: + + + + + +

+ + + + + + +
+ +     Analyzer analyzer = new StandardAnalyzer();
+     // create an index in /tmp/index, overwriting an existing one:
+     IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
+     Document doc = new Document();
+     doc.add(new Field("id""1", Field.Store.YES, Field.Index.NOT_ANALYZED));
+     doc.add(new Field("body""a simple test", Field.Store.YES, Field.Index.ANALYZED));
+     indexModifier.addDocument(doc);
+     int deleted = indexModifier.delete(new Term("id""1"));
+     System.out.println("Deleted " + deleted + " document");
+     indexModifier.flush();
+     System.out.println(indexModifier.docCount() " docs in index");
+     indexModifier.close();
+
+
+ + + +

Not all methods of IndexReader and IndexWriter are offered by this + class. If you need access to additional methods, either use those classes + directly or implement your own class that extends IndexModifier. + +

Although an instance of this class can be used from more than one + thread, you will not get the best performance. You might want to use + IndexReader and IndexWriter directly for that (but you will need to + care about synchronization yourself then). + +

While you can freely mix calls to add() and delete() using this class, + you should batch you calls for best performance. For example, if you + want to update 20 documents, you should first delete all those documents, + then add all the new documents. + +

+ Please use {@link IndexWriter} instead. + +
+ + Open an index with write access. + + + the index directory + + the analyzer to use for adding new documents + + true to create the index or overwrite the existing one; + false to append to the existing index + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Open an index with write access. + + + the index directory + + the analyzer to use for adding new documents + + true to create the index or overwrite the existing one; + false to append to the existing index + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Open an index with write access. + + + the index directory + + the analyzer to use for adding new documents + + true to create the index or overwrite the existing one; + false to append to the existing index + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Initialize an IndexWriter. + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Throw an IllegalStateException if the index is closed. + IllegalStateException + + + Close the IndexReader and open an IndexWriter. + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Close the IndexWriter and open an IndexReader. + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Make sure all changes are written to disk. + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Adds a document to this index, using the provided analyzer instead of the + one specific in the constructor. If the document contains more than + {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are + discarded. + + + + IllegalStateException if the index is closed + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Adds a document to this index. If the document contains more than + {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are + discarded. + + + + IllegalStateException if the index is closed + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Deletes all documents containing term. + This is useful if one uses a document field to hold a unique ID string for + the document. Then to delete such a document, one merely constructs a + term with the appropriate field and the unique ID string as its text and + passes it to this method. Returns the number of documents deleted. + + the number of documents deleted + + + + IllegalStateException if the index is closed + StaleReaderException if the index has changed + since this reader was opened + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Deletes the document numbered docNum. + + + StaleReaderException if the index has changed + since this reader was opened + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IllegalStateException if the index is closed + + + Returns the number of documents currently in this + index. If the writer is currently open, this returns + {@link IndexWriter#DocCount()}, else {@link + IndexReader#NumDocs()}. But, note that {@link + IndexWriter#DocCount()} does not take deletions into + account, unlike {@link IndexReader#numDocs}. + + IllegalStateException if the index is closed + + + Merges all segments together into a single segment, optimizing an index + for search. + + + + IllegalStateException if the index is closed + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + If non-null, information about merges and a message when + {@link #GetMaxFieldLength()} is reached will be printed to this. +

Example: index.setInfoStream(System.err); +

+ + + IllegalStateException if the index is closed +
+ + + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Setting to turn on usage of a compound file. When on, multiple files + for each segment are merged into a single file once the segment creation + is finished. This is done regardless of what directory is in use. + + + + IllegalStateException if the index is closed + + + + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + The maximum number of terms that will be indexed for a single field in a + document. This limits the amount of memory required for indexing, so that + collections with very large files will not crash the indexing process by + running out of memory.

+ Note that this effectively truncates large documents, excluding from the + index terms that occur further in the document. If you know your source + documents are large, be sure to set this value high enough to accommodate + the expected size. If you set it to Integer.MAX_VALUE, then the only limit + is your memory, but you should anticipate an OutOfMemoryError.

+ By default, no more than 10,000 terms will be indexed for a field. +

+ + + IllegalStateException if the index is closed +
+ + + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Determines the minimal number of documents required before the buffered + in-memory documents are merging and a new Segment is created. + Since Documents are merged in a {@link Lucene.Net.Store.RAMDirectory}, + large value gives faster indexing. At the same time, mergeFactor limits + the number of files open in a FSDirectory. + +

The default value is 10. + +

+ + + IllegalStateException if the index is closed + IllegalArgumentException if maxBufferedDocs is smaller than 2 +
+ + + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Determines how often segment indices are merged by addDocument(). With + smaller values, less RAM is used while indexing, and searches on + unoptimized indices are faster, but indexing speed is slower. With larger + values, more RAM is used during indexing, and while searches on unoptimized + indices are slower, indexing is faster. Thus larger values (> 10) are best + for batch index creation, and smaller values (< 10) for indices that are + interactively maintained. +

This must never be less than 2. The default value is 10. + +

+ + + IllegalStateException if the index is closed +
+ + + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if there is a low-level IO error + + + Close this index, writing all pending changes to disk. + + + IllegalStateException if the index has been closed before already + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Combines multiple files into a single compound file. + The file format:
+
    +
  • VInt fileCount
  • +
  • {Directory} + fileCount entries with the following structure:
  • +
      +
    • long dataOffset
    • +
    • String fileName
    • +
    +
  • {File Data} + fileCount entries with the raw data of the corresponding file
  • +
+ + The fileCount integer indicates how many files are contained in this compound + file. The {directory} that follows has that many entries. Each directory entry + contains a long pointer to the start of this file's data section, and a String + with that file's name. + + +
+ $Id: CompoundFileWriter.java 690539 2008-08-30 17:33:06Z mikemccand $ + +
+ + Create the compound stream in the specified file. The file name is the + entire name (no extensions are added). + + NullPointerException if dir or name is null + + + Returns the directory of the compound file. + + + Returns the name of the compound file. + + + Add a source stream. file is the string by which the + sub-stream will be known in the compound stream. + + + IllegalStateException if this writer is closed + NullPointerException if file is null + IllegalArgumentException if a file with the same name + has been added already + + + + Merge files with the extensions added up to now. + All files with these extensions are combined sequentially into the + compound stream. After successful merge, the source files + are deleted. + + IllegalStateException if close() had been called before or + if no file has been added to this object + + + + Copy the contents of the file with specified extension into the + provided output stream. Use the provided buffer for moving data + to reduce memory allocation. + + + + source file + + + temporary holder for the start of directory entry for this file + + + temporary holder for the start of this file's data section + + + Class for accessing a compound stream. + This class implements a directory, but is limited to only read operations. + Directory methods that would normally modify data throw an exception. + + + + $Id: CompoundFileReader.java 673371 2008-07-02 11:57:27Z mikemccand $ + + + + Returns an array of strings, one for each file in the directory. + + + Returns true iff a file with the given name exists. + + + Returns the time the compound file was last modified. + + + Set the modified time of the compound file to now. + + + Not implemented + UnsupportedOperationException + + + Not implemented + UnsupportedOperationException + + + Returns the length of a file in the directory. + IOException if the file does not exist + + + Not implemented + UnsupportedOperationException + + + Not implemented + UnsupportedOperationException + + + Implementation of an IndexInput that reads from a portion of the + compound file. The visibility is left as "package" *only* because + this helps with testing since JUnit test cases in a different class + can then access package fields of this class. + + + + Expert: implements buffer refill. Reads bytes from the current + position in the input. + + the array to read bytes into + + the offset in the array to start storing bytes + + the number of bytes to read + + + + Expert: implements seek. Sets current position in this file, where + the next {@link #ReadInternal(byte[],int,int)} will occur. + + + + + + Closes the stream to further operations. + + + The payload of a Token. See also {@link Payload}. + + + Initialize this attribute with no payload. + + + Initialize this attribute with the given payload. + + + Returns this Token's payload. + + + Sets this Token's payload. + + + Normalizes tokens extracted with {@link StandardTokenizer}. + + + Construct filtering in. + + + Returns the next token in the stream, or null at EOS. +

Removes 's from the end of words. +

Removes dots from acronyms. +

+
+ + Simplistic {@link CharFilter} that applies the mappings + contained in a {@link NormalizeCharMap} to the character + stream, and correcting the resulting changes to the + offsets. + + + + Base utility class for implementing a {@link CharFilter}. + You subclass this, and then record mappings by calling + {@link #addOffCorrectMap}, and then invoke the correct + method to correct an offset. + +

NOTE: This class is not particularly efficient. + For example, a new class instance is created for every + call to {@link #addOffCorrectMap}, which is then appended + to a private list. +

+
+ + Subclasses of CharFilter can be chained to filter CharStream. + They can be used as {@link java.io.Reader} with additional offset + correction. {@link Tokenizer}s will automatically use {@link #CorrectOffset} + if a CharFilter/CharStream subclass is used. + + + $Id$ + + + + + Subclass may want to override to correct the current offset. + + + current offset + + corrected offset + + + + Chains the corrected offset through the input + CharFilter. + + + + Retrieve the corrected offset. Note that this method + is slow, if you correct positions far before the most + recently added position, as it's a simple linear + search backwards through all offset corrections added + by {@link #addOffCorrectMap}. + + + + Default constructor that takes a {@link CharStream}. + + + Easy-use constructor that takes a {@link Reader}. + + + Estimates the size of a given Object using a given MemoryModel for primitive + size information. + + Resource Usage: + + Internally uses a Map to temporally hold a reference to every + object seen. + + If checkIntered, all Strings checked will be interned, but those + that were not already interned will be released for GC when the + estimate is complete. + + + + Constructs this object with an AverageGuessMemoryModel and + checkInterned = true. + + + + check if Strings are interned and don't add to size + if they are. Defaults to true but if you know the objects you are checking + won't likely contain many interned Strings, it will be faster to turn off + intern checking. + + + + MemoryModel to use for primitive object sizes. + + + + MemoryModel to use for primitive object sizes. + + check if Strings are interned and don't add to size + if they are. Defaults to true but if you know the objects you are checking + won't likely contain many interned Strings, it will be faster to turn off + intern checking. + + + + Return good default units based on byte size. + + + Provides support for converting byte sequences to Strings and back again. + The resulting Strings preserve the original byte sequences' sort order. + + The Strings are constructed using a Base 8000h encoding of the original + binary data - each char of an encoded String represents a 15-bit chunk + from the byte sequence. Base 8000h was chosen because it allows for all + lower 15 bits of char to be used without restriction; the surrogate range + [U+D8000-U+DFFF] does not represent valid chars, and would require + complicated handling to avoid them and allow use of char's high bit. + + Although unset bits are used as padding in the final char, the original + byte sequence could contain trailing bytes with no set bits (null bytes): + padding is indistinguishable from valid information. To overcome this + problem, a char is appended, indicating the number of encoded bytes in the + final content char. + + This class's operations are defined over CharBuffers and ByteBuffers, to + allow for wrapped arrays to be reused, reducing memory allocation costs for + repeated operations. Note that this class calls array() and arrayOffset() + on the CharBuffers and ByteBuffers it uses, so only wrapped arrays may be + used. This class interprets the arrayOffset() and limit() values returned by + its input buffers as beginning and end+1 positions on the wrapped array, + resprectively; similarly, on the output buffer, arrayOffset() is the first + position written to, and limit() is set to one past the final output array + position. + + + + Returns the number of chars required to encode the given byte sequence. + + + The byte sequence to be encoded. Must be backed by an array. + + The number of chars required to encode the given byte sequence + + IllegalArgumentException If the given ByteBuffer is not backed by an array + + + Returns the number of bytes required to decode the given char sequence. + + + The char sequence to be encoded. Must be backed by an array. + + The number of bytes required to decode the given char sequence + + IllegalArgumentException If the given CharBuffer is not backed by an array + + + Encodes the input byte sequence into the output char sequence. Before + calling this method, ensure that the output CharBuffer has sufficient + capacity by calling {@link #GetEncodedLength(java.nio.ByteBuffer)}. + + + The byte sequence to encode + + Where the char sequence encoding result will go. The limit + is set to one past the position of the final char. + + IllegalArgumentException If either the input or the output buffer + is not backed by an array + + + + Decodes the input char sequence into the output byte sequence. Before + calling this method, ensure that the output ByteBuffer has sufficient + capacity by calling {@link #GetDecodedLength(java.nio.CharBuffer)}. + + + The char sequence to decode + + Where the byte sequence decoding result will go. The limit + is set to one past the position of the final char. + + IllegalArgumentException If either the input or the output buffer + is not backed by an array + + + + Decodes the given char sequence, which must have been encoded by + {@link #Encode(java.nio.ByteBuffer)} or + {@link #Encode(java.nio.ByteBuffer, java.nio.CharBuffer)}. + + + The char sequence to decode + + A byte sequence containing the decoding result. The limit + is set to one past the position of the final char. + + IllegalArgumentException If the input buffer is not backed by an + array + + + + Encodes the input byte sequence. + + + The byte sequence to encode + + A char sequence containing the encoding result. The limit is set + to one past the position of the final char. + + IllegalArgumentException If the input buffer is not backed by an + array + + + + A memory-resident {@link IndexInput} implementation. + + + $Id: RAMInputStream.java 632120 2008-02-28 21:13:59Z mikemccand $ + + + + This exception is thrown when the write.lock + could not be acquired. This + happens when a writer tries to open an index + that another writer already has open. + + + + + + Expert-only. Public for use by other weight implementations + + + Stores information about how to sort documents by terms in an individual + field. Fields must be indexed in order to sort by them. + +

Created: Feb 11, 2004 1:25:29 PM + +

+ lucene 1.4 + + $Id: SortField.java 801344 2009-08-05 18:05:06Z yonik $ + + + +
+ + Sort by document score (relevancy). Sort values are Float and higher + values are at the front. + + + + Sort by document number (index order). Sort values are Integer and lower + values are at the front. + + + + Guess type of sort based on field contents. A regular expression is used + to look at the first term indexed for the field and determine if it + represents an integer number, a floating point number, or just arbitrary + string characters. + + Please specify the exact type, instead. + Especially, guessing does not work with the new + {@link NumericField} type. + + + + Sort using term values as Strings. Sort values are String and lower + values are at the front. + + + + Sort using term values as encoded Integers. Sort values are Integer and + lower values are at the front. + + + + Sort using term values as encoded Floats. Sort values are Float and + lower values are at the front. + + + + Sort using term values as encoded Longs. Sort values are Long and + lower values are at the front. + + + + Sort using term values as encoded Doubles. Sort values are Double and + lower values are at the front. + + + + Sort using term values as encoded Shorts. Sort values are Short and + lower values are at the front. + + + + Sort using a custom Comparator. Sort values are any Comparable and + sorting is done according to natural order. + + + + Sort using term values as encoded Bytes. Sort values are Byte and + lower values are at the front. + + + + Sort using term values as Strings, but comparing by + value (using String.compareTo) for all comparisons. + This is typically slower than {@link #STRING}, which + uses ordinals to do the sorting. + + + + Represents sorting by document score (relevancy). + + + Represents sorting by document number (index order). + + + Creates a sort by terms in the given field where the type of term value + is determined dynamically ({@link #AUTO AUTO}). + + Name of field to sort by, cannot be + null. + + Please specify the exact type instead. + + + + Creates a sort, possibly in reverse, by terms in the given field where + the type of term value is determined dynamically ({@link #AUTO AUTO}). + + Name of field to sort by, cannot be null. + + True if natural order should be reversed. + + Please specify the exact type instead. + + + + Creates a sort by terms in the given field with the type of term + values explicitly given. + + Name of field to sort by. Can be null if + type is SCORE or DOC. + + Type of values in the terms. + + + + Creates a sort, possibly in reverse, by terms in the given field with the + type of term values explicitly given. + + Name of field to sort by. Can be null if + type is SCORE or DOC. + + Type of values in the terms. + + True if natural order should be reversed. + + + + Creates a sort by terms in the given field, parsed + to numeric values using a custom {@link FieldCache.Parser}. + + Name of field to sort by. Must not be null. + + Instance of a {@link FieldCache.Parser}, + which must subclass one of the existing numeric + parsers from {@link FieldCache}. Sort type is inferred + by testing which numeric parser the parser subclasses. + + IllegalArgumentException if the parser fails to + subclass an existing numeric parser, or field is null + + + + Creates a sort, possibly in reverse, by terms in the given field, parsed + to numeric values using a custom {@link FieldCache.Parser}. + + Name of field to sort by. Must not be null. + + Instance of a {@link FieldCache.Parser}, + which must subclass one of the existing numeric + parsers from {@link FieldCache}. Sort type is inferred + by testing which numeric parser the parser subclasses. + + True if natural order should be reversed. + + IllegalArgumentException if the parser fails to + subclass an existing numeric parser, or field is null + + + + Creates a sort by terms in the given field sorted + according to the given locale. + + Name of field to sort by, cannot be null. + + Locale of values in the field. + + + + Creates a sort, possibly in reverse, by terms in the given field sorted + according to the given locale. + + Name of field to sort by, cannot be null. + + Locale of values in the field. + + + + Creates a sort with a custom comparison function. + Name of field to sort by; cannot be null. + + Returns a comparator for sorting hits. + + use SortField (String field, FieldComparatorSource comparator) + + + + Creates a sort with a custom comparison function. + Name of field to sort by; cannot be null. + + Returns a comparator for sorting hits. + + + + Creates a sort, possibly in reverse, with a custom comparison function. + Name of field to sort by; cannot be null. + + Returns a comparator for sorting hits. + + True if natural order should be reversed. + + use SortField (String field, FieldComparatorSource comparator, boolean reverse) + + + + Creates a sort, possibly in reverse, with a custom comparison function. + Name of field to sort by; cannot be null. + + Returns a comparator for sorting hits. + + True if natural order should be reversed. + + + + Returns the name of the field. Could return null + if the sort is by SCORE or DOC. + + Name of field, possibly null. + + + + Returns the type of contents in the field. + One of the constants SCORE, DOC, AUTO, STRING, INT or FLOAT. + + + + Returns the Locale by which term values are interpreted. + May return null if no Locale was specified. + + Locale, or null. + + + + Returns the instance of a {@link FieldCache} parser that fits to the given sort type. + May return null if no parser was specified. Sorting is using the default parser then. + + An instance of a {@link FieldCache} parser, or null. + + + + Returns whether the sort should be reversed. + True if natural order should be reversed. + + + + use {@link #GetComparatorSource()} + + + + Use legacy IndexSearch implementation: search with a DirectoryReader rather + than passing a single hit collector to multiple SegmentReaders. + + + true for legacy behavior + + will be removed in Lucene 3.0. + + + + if true, IndexSearch will use legacy sorting search implementation. + eg. multiple Priority Queues. + + will be removed in Lucene 3.0. + + + + Returns true if o is equal to this. If a + {@link SortComparatorSource} (deprecated) or {@link + FieldCache.Parser} was provided, it must properly + implement equals (unless a singleton is always used). + + + + Returns true if o is equal to this. If a + {@link SortComparatorSource} (deprecated) or {@link + FieldCache.Parser} was provided, it must properly + implement hashCode (unless a singleton is always + used). + + + + + Lucene.Net specific. Needed for Serialization + + + + + + + Lucene.Net specific. Needed for deserialization + + + + + + Returns the {@link FieldComparator} to use for + sorting. + + NOTE: This API is experimental and might change in + incompatible ways in the next release. + + + number of top hits the queue will store + + position of this SortField within {@link + Sort}. The comparator is primary if sortPos==0, + secondary if sortPos==1, etc. Some comparators can + optimize themselves when they are the primary sort. + + {@link FieldComparator} to use when sorting + + + + Attempts to detect the given field type for an IndexReader. + + + + + Collapse the hash table & sort in-place. + + + Compares term text for two Posting instance and + returns -1 if p1 < p2; 1 if p1 > p2; else 0. + + + + Test whether the text for current RawPostingList p equals + current tokenText. + + + + Called when postings hash is too small (> 50% + occupied) or too large (< 20% occupied). + + + + Optimized implementation. + + + Overridden by SegmentTermPositions to skip in prox stream. + + + Optimized implementation. + + + Called by super.skipTo(). + + + + Stemmer, implementing the Porter Stemming Algorithm + + The Stemmer class transforms a word into its root form. The input + word can be provided a character at time (by calling add()), or at once + by calling one of the various stem(something) methods. + + + + reset() resets the stemmer so it can stem another word. If you invoke + the stemmer by calling add(char) and then stem(), you must call reset() + before starting another word. + + + + Add a character to the word being stemmed. When you are finished + adding characters, you can call stem(void) to process the word. + + + + After a word has been stemmed, it can be retrieved by toString(), + or a reference to the internal buffer can be retrieved by getResultBuffer + and getResultLength (which is generally more efficient.) + + + + Returns the length of the word resulting from the stemming process. + + + Returns a reference to a character buffer containing the results of + the stemming process. You also need to consult getResultLength() + to determine the length of the result. + + + + Stem a word provided as a String. Returns the result as a String. + + + Stem a word contained in a char[]. Returns true if the stemming process + resulted in a word different from the input. You can retrieve the + result with getResultLength()/getResultBuffer() or toString(). + + + + Stem a word contained in a portion of a char[] array. Returns + true if the stemming process resulted in a word different from + the input. You can retrieve the result with + getResultLength()/getResultBuffer() or toString(). + + + + Stem a word contained in a leading portion of a char[] array. + Returns true if the stemming process resulted in a word different + from the input. You can retrieve the result with + getResultLength()/getResultBuffer() or toString(). + + + + Stem the word placed into the Stemmer buffer through calls to add(). + Returns true if the stemming process resulted in a word different + from the input. You can retrieve the result with + getResultLength()/getResultBuffer() or toString(). + + + + Test program for demonstrating the Stemmer. It reads a file and + stems each word, writing the result to standard out. + Usage: Stemmer file-name + + + + Implements {@link LockFactory} for a single in-process instance, + meaning all locking will take place through this one instance. + Only use this {@link LockFactory} when you are certain all + IndexReaders and IndexWriters for a given index are running + against a single shared in-process Directory instance. This is + currently the default locking for RAMDirectory. + + + + + + +

Base class for Locking implementation. {@link Directory} uses + instances of this class to implement locking.

+ +

Note that there are some useful tools to verify that + your LockFactory is working correctly: {@link + VerifyingLockFactory}, {@link LockStressTest}, {@link + LockVerifyServer}.

+ +

+ + + + + + +
+ + Set the prefix in use for all locks created in this + LockFactory. This is normally called once, when a + Directory gets this LockFactory instance. However, you + can also call this (after this instance is assigned to + a Directory) to override the prefix in use. This + is helpful if you're running Lucene on machines that + have different mount points for the same shared + directory. + + + + Get the prefix in use for all locks created in this LockFactory. + + + Return a new Lock instance identified by lockName. + name of the lock to be created. + + + + Attempt to clear (forcefully unlock and remove) the + specified lock. Only call this at a time when you are + certain this lock is no longer in use. + + name of the lock to be cleared. + + + + An interprocess mutex lock. +

Typical use might look like:

+            new Lock.With(directory.makeLock("my.lock")) {
+            public Object doBody() {
+            ... code to execute while locked ...
+            }
+            }.run();
+            
+ + +
+ $Id: Lock.java 769409 2009-04-28 14:05:43Z mikemccand $ + + + +
+ + Pass this value to {@link #Obtain(long)} to try + forever to obtain the lock. + + + + How long {@link #Obtain(long)} waits, in milliseconds, + in between attempts to acquire the lock. + + + + Attempts to obtain exclusive access and immediately return + upon success or failure. + + true iff exclusive access is obtained + + + + If a lock obtain called, this failureReason may be set + with the "root cause" Exception as to why the lock was + not obtained. + + + + Attempts to obtain an exclusive lock within amount of + time given. Polls once per {@link #LOCK_POLL_INTERVAL} + (currently 1000) milliseconds until lockWaitTimeout is + passed. + + length of time to wait in + milliseconds or {@link + #LOCK_OBTAIN_WAIT_FOREVER} to retry forever + + true if lock was obtained + + LockObtainFailedException if lock wait times out + IllegalArgumentException if lockWaitTimeout is + out of bounds + + IOException if obtain() throws IOException + + + Releases exclusive access. + + + Returns true if the resource is currently locked. Note that one must + still call {@link #Obtain()} before using the resource. + + + + Utility class for executing code with exclusive access. + + + Constructs an executor that will grab the named lock. + + + Code to execute with exclusive access. + + + Calls {@link #doBody} while lock is obtained. Blocks if lock + cannot be obtained immediately. Retries to obtain lock once per second + until it is obtained, or until it has tried ten times. Lock is released when + {@link #doBody} exits. + + LockObtainFailedException if lock could not + be obtained + + IOException if {@link Lock#obtain} throws IOException + + +

Implements {@link LockFactory} using {@link + File#createNewFile()}.

+ +

NOTE: the javadocs + for File.createNewFile contain a vague + yet spooky warning about not using the API for file + locking. This warning was added due to this + bug, and in fact the only known problem with using + this API for locking is that the Lucene write lock may + not be released when the JVM exits abnormally.

+

When this happens, a {@link LockObtainFailedException} + is hit when trying to create a writer, in which case you + need to explicitly clear the lock file first. You can + either manually remove the file, or use the {@link + org.apache.lucene.index.IndexReader#unlock(Directory)} + API. But, first be certain that no writer is in fact + writing to the index otherwise you can easily corrupt + your index.

+ +

If you suspect that this or any other LockFactory is + not working properly in your environment, you can easily + test it by using {@link VerifyingLockFactory}, {@link + LockVerifyServer} and {@link LockStressTest}.

+ +

+ + +
+ + Base class for file system based locking implementation. + + + Directory for the lock files. + + + Set the lock directory. This method can be only called + once to initialize the lock directory. It is used by {@link FSDirectory} + to set the lock directory to itsself. + Subclasses can also use this method to set the directory + in the constructor. + + + + Retrieve the lock directory. + + + Create a SimpleFSLockFactory instance, with null (unset) + lock directory. When you pass this factory to a {@link FSDirectory} + subclass, the lock directory is automatically set to the + directory itsself. Be sure to create one instance for each directory + your create! + + + + Instantiate using the provided directory (as a File instance). + where lock files should be created. + + + + Instantiate using the provided directory (as a File instance). + where lock files should be created. + + + + Instantiate using the provided directory name (String). + where lock files should be created. + + + + A {@link Collector} implementation that collects the top-scoring hits, + returning them as a {@link TopDocs}. This is used by {@link IndexSearcher} to + implement {@link TopDocs}-based search. Hits are sorted by score descending + and then (when the scores are tied) docID ascending. When you create an + instance of this collector you should know in advance whether documents are + going to be collected in doc Id order or not. + +

NOTE: The values {@link Float#NaN} and + {Float#NEGATIVE_INFINITY} are not valid scores. This + collector will not properly collect hits with such + scores. +

+
+ + Creates a new {@link TopScoreDocCollector} given the number of hits to + collect and whether documents are scored in order by the input + {@link Scorer} to {@link #SetScorer(Scorer)}. + +

NOTE: The instances returned by this method + pre-allocate a full array of length + numHits, and fill the array with sentinel + objects. +

+
+ + + + + + + + Provides access to stored term vector of + a document field. The vector consists of the name of the field, an array of the terms tha occur in the field of the + {@link Lucene.Net.Documents.Document} and a parallel array of frequencies. Thus, getTermFrequencies()[5] corresponds with the + frequency of getTerms()[5], assuming there are at least 5 terms in the Document. + + + + The {@link Lucene.Net.Documents.Fieldable} name. + The name of the field this vector is associated with. + + + + + The number of terms in the term vector. + + + + An Array of term texts in ascending order. + + + + Array of term frequencies. Locations of the array correspond one to one + to the terms in the array obtained from getTerms + method. Each location in the array contains the number of times this + term occurs in the document or the document field. + + + + Return an index in the term numbers array returned from + getTerms at which the term with the specified + term appears. If this term does not appear in the array, + return -1. + + + + Just like indexOf(int) but searches for a number of terms + at the same time. Returns an array that has the same size as the number + of terms searched for, each slot containing the result of searching for + that term number. + + + array containing terms to look for + + index in the array where the list of terms starts + + the number of terms in the list + + + + + The original list of terms from the query, can contain duplicates + + + + A {@link Collector} implementation which wraps another + {@link Collector} and makes sure only documents with + scores > 0 are collected. + + + + A query that scores each document as the value of the numeric input field. +

+ The query matches all documents, and scores each document according to the numeric + value of that field. +

+ It is assumed, and expected, that: +

    +
  • The field used here is indexed, and has exactly + one token in every scored document.
  • +
  • Best if this field is un_tokenized.
  • +
  • That token is parsable to the selected type.
  • +
+

+ Combining this query in a FunctionQuery allows much freedom in affecting document scores. + Note, that with this freedom comes responsibility: it is more than likely that the + default Lucene scoring is superior in quality to scoring modified as explained here. + However, in some cases, and certainly for research experiments, this capability may turn useful. +

+ When contructing this query, select the appropriate type. That type should match the data stored in the + field. So in fact the "right" type should be selected before indexing. Type selection + has effect on the RAM usage: +

    +
  • {@link Type#BYTE} consumes 1 * maxDocs bytes.
  • +
  • {@link Type#SHORT} consumes 2 * maxDocs bytes.
  • +
  • {@link Type#INT} consumes 4 * maxDocs bytes.
  • +
  • {@link Type#FLOAT} consumes 8 * maxDocs bytes.
  • +
+

+ Caching: + Values for the numeric field are loaded once and cached in memory for further use with the same IndexReader. + To take advantage of this, it is extremely important to reuse index-readers or index-searchers, + otherwise, for instance if for each query a new index reader is opened, large penalties would be + paid for loading the field values into memory over and over again! + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. +

+
+ + Expert: A Query that sets the scores of document to the + values obtained from a {@link Lucene.Net.Search.Function.ValueSource ValueSource}. +

+ This query provides a score for each and every undeleted document in the index. +

+ The value source can be based on a (cached) value of an indexed field, but it + can also be based on an external source, e.g. values read from an external database. +

+ Score is set as: Score(doc,query) = query.getBoost()2 * valueSource(doc). + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. +

+
+ + Create a value source query + provides the values defines the function to be used for scoring + + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + A scorer that (simply) matches all documents, and scores each document with + the value of the value soure in effect. As an example, if the value source + is a (cached) field source, then value of that field in that document will + be used. (assuming field is indexed for this doc, with a single token.) + + + + use {@link #NextDoc()} instead. + + + + use {@link #DocID()} instead. + + + + use {@link #Advance(int)} instead. + + + + Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field. +

+ The type param tells how to parse the field string values into a numeric score value. +

+ the numeric field to be used. + + the type of the field: either + {@link Type#BYTE}, {@link Type#SHORT}, {@link Type#INT}, or {@link Type#FLOAT}. + +
+ + Type of score field, indicating how field values are interpreted/parsed. +

+ The type selected at search search time should match the data stored in the field. + Different types have different RAM requirements: +

    +
  • {@link #BYTE} consumes 1 * maxDocs bytes.
  • +
  • {@link #SHORT} consumes 2 * maxDocs bytes.
  • +
  • {@link #INT} consumes 4 * maxDocs bytes.
  • +
  • {@link #FLOAT} consumes 8 * maxDocs bytes.
  • +
+
+
+ + field values are interpreted as numeric byte values. + + + field values are interpreted as numeric short values. + + + field values are interpreted as numeric int values. + + + field values are interpreted as numeric float values. + + + Expert: A hit queue for sorting by hits by terms in more than one field. + Uses FieldCache.DEFAULT for maintaining internal term lookup tables. + +

Created: Dec 8, 2003 12:56:03 PM + +

+ lucene 1.4 + + $Id: FieldSortedHitQueue.java 803676 2009-08-12 19:31:38Z hossman $ + + + + + + see {@link FieldValueHitQueue} + +
+ + Creates a hit queue sorted by the given list of fields. + Index to use. + + Fieldable names, in priority order (highest priority first). Cannot be null or empty. + + The number of hits to retain. Must be greater than zero. + + IOException + + + Stores a comparator corresponding to each field being sorted by + + + Stores the sort criteria being used. + + + Stores the maximum score value encountered, needed for normalizing. + + + returns the maximum score encountered by elements inserted via insert() + + + Returns whether a is less relevant than b. + ScoreDoc + + ScoreDoc + + true if document a should be sorted after document b. + + + + Given a FieldDoc object, stores the values used + to sort the given document. These values are not the raw + values out of the index, but the internal representation + of them. This is so the given search hit can be collated + by a MultiSearcher with other search hits. + + The FieldDoc to store sort values into. + + The same FieldDoc passed in. + + + + + + Returns the SortFields being used by this hit queue. + + + Internal cache of comparators. Similar to FieldCache, only + caches comparators instead of term values. + + + + Returns a comparator for sorting hits according to a field containing bytes. + Index to use. + + Fieldable containing integer values. + + Comparator for sorting hits. + + IOException If an error occurs reading the index. + + + Returns a comparator for sorting hits according to a field containing shorts. + Index to use. + + Fieldable containing integer values. + + Comparator for sorting hits. + + IOException If an error occurs reading the index. + + + Returns a comparator for sorting hits according to a field containing integers. + Index to use. + + Fieldable containing integer values. + + Comparator for sorting hits. + + IOException If an error occurs reading the index. + + + Returns a comparator for sorting hits according to a field containing integers. + Index to use. + + Fieldable containing integer values. + + Comparator for sorting hits. + + IOException If an error occurs reading the index. + + + Returns a comparator for sorting hits according to a field containing floats. + Index to use. + + Fieldable containing float values. + + Comparator for sorting hits. + + IOException If an error occurs reading the index. + + + Returns a comparator for sorting hits according to a field containing doubles. + Index to use. + + Fieldable containing float values. + + Comparator for sorting hits. + + IOException If an error occurs reading the index. + + + Returns a comparator for sorting hits according to a field containing strings. + Index to use. + + Fieldable containing string values. + + Comparator for sorting hits. + + IOException If an error occurs reading the index. + + + Returns a comparator for sorting hits according to a field containing strings. + Index to use. + + Fieldable containing string values. + + Comparator for sorting hits. + + IOException If an error occurs reading the index. + + + Returns a comparator for sorting hits according to values in the given field. + The terms in the field are looked at to determine whether they contain integers, + floats or strings. Once the type is determined, one of the other static methods + in this class is called to get the comparator. + + Index to use. + + Fieldable containing values. + + Comparator for sorting hits. + + IOException If an error occurs reading the index. + + + Expert: Internal cache. + + + Expert: The default cache implementation, storing all values in memory. + A WeakHashMap is used for storage. + +

Created: May 19, 2004 4:40:36 PM + +

+ lucene 1.4 + + $Id: FieldCacheImpl.java 807572 2009-08-25 11:44:45Z mikemccand $ + +
+ + Will be removed in 3.0, this is for binary compatibility only + + + + Will be removed in 3.0, this is for binary compatibility only + + + + The pattern used to detect float values in a field + removed for java 1.3 compatibility + protected static final Object pFloats = Pattern.compile ("[0-9+\\-\\.eEfFdD]+"); + + + + + + + + EXPERT: A unique Identifier/Description for each item in the FieldCache. + Can be useful for logging/debugging. +

+ EXPERIMENTAL API: This API is considered extremely advanced + and experimental. It may be removed or altered w/o warning in future + releases + of Lucene. +

+

+
+ + + + + + Computes (and stores) the estimated size of the cache Value + + + + + The most recently estimated size of the value, null unless + estimateSize has been called. + + + + Only needed because of Entry (ab)use by + FieldSortedHitQueue, remove when FieldSortedHitQueue + is removed + + + + Only needed because of Entry (ab)use by + FieldSortedHitQueue, remove when FieldSortedHitQueue + is removed + + + + Adds warning to super.toString if Local or sortFieldType were specified + Only needed because of Entry (ab)use by + FieldSortedHitQueue, remove when FieldSortedHitQueue + is removed + + + + Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops + processing terms and returns the current FieldCache + array. + + + + Expert: Internal cache. + + + Expert: Every composite-key in the internal cache is of this type. + + + Only (ab)used by FieldSortedHitQueue, + remove when FieldSortedHitQueue is removed + + + + Only (ab)used by FieldSortedHitQueue, + remove when FieldSortedHitQueue is removed + + + + Only (ab)used by FieldSortedHitQueue, + remove when FieldSortedHitQueue is removed + + + + Creates one of these objects for a custom comparator/parser. + + + Only (ab)used by FieldSortedHitQueue, + remove when FieldSortedHitQueue is removed + + + + Two of these are equal iff they reference the same field and type. + + + Composes a hashcode based on the field and type. + + + Please specify the exact type, instead. + Especially, guessing does not work with the new + {@link NumericField} type. + + + + + + + + A range filter built on top of a cached single term field (in {@link FieldCache}). + +

FieldCacheRangeFilter builds a single cache for the field the first time it is used. + Each subsequent FieldCacheRangeFilter on the same field then reuses this cache, + even if the range itself changes. + +

This means that FieldCacheRangeFilter is much faster (sometimes more than 100x as fast) + as building a {@link TermRangeFilter} (or {@link ConstantScoreRangeQuery} on a {@link TermRangeFilter}) + for each query, if using a {@link #newStringRange}. However, if the range never changes it + is slower (around 2x as slow) than building a CachingWrapperFilter on top of a single TermRangeFilter. + + For numeric data types, this filter may be significantly faster than {@link NumericRangeFilter}. + Furthermore, it does not need the numeric values encoded by {@link NumericField}. But + it has the problem that it only works with exact one value/document (see below). + +

As with all {@link FieldCache} based functionality, FieldCacheRangeFilter is only valid for + fields which exact one term for each document (except for {@link #newStringRange} + where 0 terms are also allowed). Due to a restriction of {@link FieldCache}, for numeric ranges + all terms that do not have a numeric value, 0 is assumed. + +

Thus it works on dates, prices and other single value fields but will not work on + regular text fields. It is preferable to use a NOT_ANALYZED field to ensure that + there is only a single term. + +

This class does not have an constructor, use one of the static factory methods available, + that create a correct instance for different data types supported by {@link FieldCache}. +

+
+ + This method is implemented for each data type + + + Creates a string range query using {@link FieldCache#getStringIndex}. This works with all + fields containing zero or one term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using {@link FieldCache#GetBytes(IndexReader,String)}. This works with all + byte fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using {@link FieldCache#GetBytes(IndexReader,String,FieldCache.ByteParser)}. This works with all + byte fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using {@link FieldCache#GetShorts(IndexReader,String)}. This works with all + short fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using {@link FieldCache#GetShorts(IndexReader,String,FieldCache.ShortParser)}. This works with all + short fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using {@link FieldCache#GetInts(IndexReader,String)}. This works with all + int fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using {@link FieldCache#GetInts(IndexReader,String,FieldCache.IntParser)}. This works with all + int fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using {@link FieldCache#GetLongs(IndexReader,String)}. This works with all + long fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using {@link FieldCache#GetLongs(IndexReader,String,FieldCache.LongParser)}. This works with all + long fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using {@link FieldCache#GetFloats(IndexReader,String)}. This works with all + float fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using {@link FieldCache#GetFloats(IndexReader,String,FieldCache.FloatParser)}. This works with all + float fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using {@link FieldCache#GetDoubles(IndexReader,String)}. This works with all + double fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + Creates a numeric range query using {@link FieldCache#GetDoubles(IndexReader,String,FieldCache.DoubleParser)}. This works with all + double fields containing exactly one numeric term in the field. The range can be half-open by setting one + of the values to null. + + + + this method checks, if a doc is a hit, should throw AIOBE, when position invalid + + + this DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs + + + @deprecated use {@link #NextDoc()} instead. + + + use {@link #Advance(int)} instead. + + + + use {@link #DocID()} instead. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + use {@link #DocID()} instead. + + + + Expert: Describes the score computation for document and query, and + can distinguish a match independent of a positive value. + + + + Expert: Describes the score computation for document and query. + + + Indicates whether or not this Explanation models a good match. + +

+ By default, an Explanation represents a "match" if the value is positive. +

+

+ + +
+ + The value assigned to this explanation node. + + + Sets the value assigned to this explanation node. + + + A description of this explanation node. + + + Sets the description of this explanation node. + + + A short one line summary which should contain all high level + information about this Explanation, without the "Details" + + + + The sub-nodes of this explanation node. + + + Adds a sub-node to this explanation node. + + + Render an explanation as text. + + + Render an explanation as HTML. + + + Small Util class used to pass both an idf factor as well as an + explanation for that factor. + + This class will likely be held on a {@link Weight}, so be aware + before storing any large or un-serializable fields. + + + + + the idf factor + + + + This should be calculated lazily if possible. + + + the explanation for the idf factor. + + + + The match status of this explanation node. + May be null if match status is unknown + + + + Sets the match status assigned to this explanation node. + May be null if match status is unknown + + + + Indicates whether or not this Explanation models a good match. + +

+ If the match status is explicitly set (i.e.: not null) this method + uses it; otherwise it defers to the superclass. +

+

+ + +
+ + Describes the input token stream. + + + An integer that describes the kind of this token. This numbering + system is determined by JavaCCParser, and a table of these numbers is + stored in the file ...Constants.java. + + + + The line number of the first character of this Token. + + + The column number of the first character of this Token. + + + The line number of the last character of this Token. + + + The column number of the last character of this Token. + + + The string image of the token. + + + A reference to the next regular (non-special) token from the input + stream. If this is the last token from the input stream, or if the + token manager has not read tokens beyond this one, this field is + set to null. This is true only if this token is also a regular + token. Otherwise, see below for a description of the contents of + this field. + + + + This field is used to access special tokens that occur prior to this + token, but after the immediately preceding regular (non-special) token. + If there are no such special tokens, this field is set to null. + When there are more than one such special token, this field refers + to the last of these special tokens, which in turn refers to the next + previous special token through its specialToken field, and so on + until the first special token (whose specialToken field is null). + The next fields of special tokens refer to other special tokens that + immediately follow it (without an intervening regular token). If there + is no such token, this field is null. + + + + An optional attribute value of the Token. + Tokens which are not used as syntactic sugar will often contain + meaningful values that will be used later on by the compiler or + interpreter. This attribute value is often different from the image. + Any subclass of Token that actually wants to return a non-null value can + override this method as appropriate. + + + + No-argument constructor + + + Constructs a new token for the specified Image. + + + Constructs a new token for the specified Image and Kind. + + + Returns the image. + + + Returns a new Token object, by default. However, if you want, you + can create and return subclass objects based on the value of ofKind. + Simply add the cases to the switch for all those special cases. + For example, if you have a subclass of Token called IDToken that + you want to create if ofKind is ID, simply add something like : + + case MyParserConstants.ID : return new IDToken(ofKind, image); + + to the following switch statement. Then you can cast matchedToken + variable to the appropriate type and use sit in your lexical actions. + + + + Call this if the IndexInput passed to {@link #read} + stores terms in the "modified UTF8" (pre LUCENE-510) + format. + + + + This {@link IndexDeletionPolicy} implementation that + keeps only the most recent commit and immediately removes + all prior commits after a new commit is done. This is + the default deletion policy. + + + +

Expert: policy for deletion of stale {@link IndexCommit index commits}. + +

Implement this interface, and pass it to one + of the {@link IndexWriter} or {@link IndexReader} + constructors, to customize when older + {@link IndexCommit point-in-time commits} + are deleted from the index directory. The default deletion policy + is {@link KeepOnlyLastCommitDeletionPolicy}, which always + removes old commits as soon as a new commit is done (this + matches the behavior before 2.2).

+ +

One expected use case for this (and the reason why it + was first created) is to work around problems with an + index directory accessed via filesystems like NFS because + NFS does not provide the "delete on last close" semantics + that Lucene's "point in time" search normally relies on. + By implementing a custom deletion policy, such as "a + commit is only removed once it has been stale for more + than X minutes", you can give your readers time to + refresh to the new commit before {@link IndexWriter} + removes the old commits. Note that doing so will + increase the storage requirements of the index. See LUCENE-710 + for details.

+

+
+ +

This is called once when a writer is first + instantiated to give the policy a chance to remove old + commit points.

+ +

The writer locates all index commits present in the + index directory and calls this method. The policy may + choose to delete some of the commit points, doing so by + calling method {@link IndexCommit#delete delete()} + of {@link IndexCommit}.

+ +

Note: the last CommitPoint is the most recent one, + i.e. the "front index state". Be careful not to delete it, + unless you know for sure what you are doing, and unless + you can afford to lose the index content while doing that. + +

+ List of current + {@link IndexCommit point-in-time commits}, + sorted by age (the 0th one is the oldest commit). + +
+ +

This is called each time the writer completed a commit. + This gives the policy a chance to remove old commit points + with each commit.

+ +

The policy may now choose to delete old commit points + by calling method {@link IndexCommit#delete delete()} + of {@link IndexCommit}.

+ +

If writer has autoCommit = true then + this method will in general be called many times during + one instance of {@link IndexWriter}. If + autoCommit = false then this method is + only called once when {@link IndexWriter#close} is + called, or not at all if the {@link IndexWriter#abort} + is called. + +

Note: the last CommitPoint is the most recent one, + i.e. the "front index state". Be careful not to delete it, + unless you know for sure what you are doing, and unless + you can afford to lose the index content while doing that. + +

+ List of {@link IndexCommit}, + sorted by age (the 0th one is the oldest commit). + +
+ + Deletes all commits except the most recent one. + + + Deletes all commits except the most recent one. + + +

Expert: represents a single commit into an index as seen by the + {@link IndexDeletionPolicy} or {@link IndexReader}.

+ +

Changes to the content of an index are made visible + only after the writer who made that change commits by + writing a new segments file + (segments_N). This point in time, when the + action of writing of a new segments file to the directory + is completed, is an index commit.

+ +

Each index commit point has a unique segments file + associated with it. The segments file associated with a + later index commit point would have a larger N.

+ +

WARNING: This API is a new and experimental and + may suddenly change.

+

+
+ + Please subclass IndexCommit class instead + + + + Get the segments file (segments_N) associated + with this commit point. + + + + Returns all index files referenced by this commit point. + + + Delete this commit point. +

+ Upon calling this, the writer is notified that this commit + point should be deleted. +

+ Decision that a commit-point should be deleted is taken by the {@link IndexDeletionPolicy} in effect + and therefore this should only be called by its {@link IndexDeletionPolicy#onInit onInit()} or + {@link IndexDeletionPolicy#onCommit onCommit()} methods. +

+
+ + Get the segments file (segments_N) associated + with this commit point. + + + + Returns all index files referenced by this commit point. + + + Returns the {@link Directory} for the index. + + + Delete this commit point. This only applies when using + the commit point in the context of IndexWriter's + IndexDeletionPolicy. +

+ Upon calling this, the writer is notified that this commit + point should be deleted. +

+ Decision that a commit-point should be deleted is taken by the {@link IndexDeletionPolicy} in effect + and therefore this should only be called by its {@link IndexDeletionPolicy#onInit onInit()} or + {@link IndexDeletionPolicy#onCommit onCommit()} methods. +

+
+ + Returns true if this commit is an optimized index. + + + Two IndexCommits are equal if both their Directory and versions are equal. + + + Returns the version for this IndexCommit. This is the + same value that {@link IndexReader#getVersion} would + return if it were opened on this commit. + + + + Returns the generation (the _N in segments_N) for this + IndexCommit + + + + Convenience method that returns the last modified time + of the segments_N file corresponding to this index + commit, equivalent to + getDirectory().fileModified(getSegmentsFileName()). + + + + Returns userData, previously passed to {@link + IndexWriter#Commit(Map)} for this commit. Map is + String -> String. + + + + This class keeps track of closing the underlying directory. It is used to wrap + DirectoryReaders, that are created using a String/File parameter + in IndexReader.open() with FSDirectory.getDirectory(). + + This helper class is removed with all String/File + IndexReader.open() methods in Lucene 3.0 + + + + A FilterIndexReader contains another IndexReader, which it + uses as its basic source of data, possibly transforming the data along the + way or providing additional functionality. The class + FilterIndexReader itself simply implements all abstract methods + of IndexReader with versions that pass all requests to the + contained index reader. Subclasses of FilterIndexReader may + further override some of these methods and may also provide additional + methods and fields. + + + +

Construct a FilterIndexReader based on the specified base reader. + Directory locking for delete, undeleteAll, and setNorm operations is + left to the base reader.

+

Note that base reader is closed if this FilterIndexReader is closed.

+

+ specified base reader. + +
+ + + + + + Base class for filtering {@link TermDocs} implementations. + + + Base class for filtering {@link TermPositions} implementations. + + + Base class for filtering {@link TermEnum} implementations. + + + This member contains the ref counter, that is passed to each instance after cloning/reopening, + and is global to all DirectoryOwningReader derived from the original one. + This reuses the class {@link SegmentReader.Ref} + + + + Provides support for converting dates to strings and vice-versa. + The strings are structured so that lexicographic sorting orders + them by date, which makes them suitable for use as field values + and search terms. + +

This class also helps you to limit the resolution of your dates. Do not + save dates with a finer resolution than you really need, as then + RangeQuery and PrefixQuery will require more memory and become slower. + +

Compared to {@link DateField} the strings generated by the methods + in this class take slightly more space, unless your selected resolution + is set to Resolution.DAY or lower. + +

+ Another approach is {@link NumericUtils}, which provides + a sortable binary representation (prefix encoded) of numeric values, which + date/time are. + For indexing a {@link Date} or {@link Calendar}, just get the unix timestamp as + long using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and + index this as a numeric value with {@link NumericField} + and use {@link NumericRangeQuery} to query it. +

+
+ + Converts a Date to a string suitable for indexing. + + + the date to be converted + + the desired resolution, see + {@link #Round(Date, DateTools.Resolution)} + + a string in format yyyyMMddHHmmssSSS or shorter, + depending on resolution; using GMT as timezone + + + + Converts a millisecond time to a string suitable for indexing. + + + the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT + + the desired resolution, see + {@link #Round(long, DateTools.Resolution)} + + a string in format yyyyMMddHHmmssSSS or shorter, + depending on resolution; using GMT as timezone + + + + Converts a string produced by timeToString or + DateToString back to a time, represented as the + number of milliseconds since January 1, 1970, 00:00:00 GMT. + + + the date string to be converted + + the number of milliseconds since January 1, 1970, 00:00:00 GMT + + ParseException if dateString is not in the + expected format + + + + Converts a string produced by timeToString or + DateToString back to a time, represented as a + Date object. + + + the date string to be converted + + the parsed time as a Date object + + ParseException if dateString is not in the + expected format + + + + Limit a date's resolution. For example, the date 2004-09-21 13:50:11 + will be changed to 2004-09-01 00:00:00 when using + Resolution.MONTH. + + + The desired resolution of the date to be returned + + the date with all values more precise than resolution + set to 0 or 1 + + + + Limit a date's resolution. For example, the date 1095767411000 + (which represents 2004-09-21 13:50:11) will be changed to + 1093989600000 (2004-09-01 00:00:00) when using + Resolution.MONTH. + + + The time in milliseconds (not ticks). + The desired resolution of the date to be returned + + the date with all values more precise than resolution + set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT + + + + Specifies the time granularity. + + + A variety of high efficiencly bit twiddling routines. + + + $Id$ + + + + Returns the number of bits set in the long + + + Returns the number of set bits in an array of longs. + + + Returns the popcount or cardinality of the two sets after an intersection. + Neither array is modified. + + + + Returns the popcount or cardinality of the union of two sets. + Neither array is modified. + + + + Returns the popcount or cardinality of A & ~B + Neither array is modified. + + + + table of number of trailing zeros in a byte + + + Returns number of trailing zeros in a 64 bit long value. + + + Returns number of trailing zeros in a 32 bit int value. + + + returns 0 based index of first set bit + (only works for x!=0) +
This is an alternate implementation of ntz() +
+
+ + returns 0 based index of first set bit +
This is an alternate implementation of ntz() +
+
+ + returns true if v is a power of two or zero + + + returns true if v is a power of two or zero + + + returns the next highest power of two, or the current value if it's already a power of two or zero + + + returns the next highest power of two, or the current value if it's already a power of two or zero + + + A {@link Collector} that sorts by {@link SortField} using + {@link FieldComparator}s. +

+ See the {@link #create(Lucene.Net.Search.Sort, int, boolean, boolean, boolean, boolean)} method + for instantiating a TopFieldCollector. + +

NOTE: This API is experimental and might change in + incompatible ways in the next release.

+

+
+ + Creates a new {@link TopFieldCollector} from the given + arguments. + +

NOTE: The instances returned by this method + pre-allocate a full array of length + numHits. + +

+ the sort criteria (SortFields). + + the number of results to collect. + + specifies whether the actual field values should be returned on + the results (FieldDoc). + + specifies whether document scores should be tracked and set on the + results. Note that if set to false, then the results' scores will + be set to Float.NaN. Setting this to true affects performance, as + it incurs the score computation on each competitive result. + Therefore if document scores are not required by the application, + it is recommended to set it to false. + + specifies whether the query's maxScore should be tracked and set + on the resulting {@link TopDocs}. Note that if set to false, + {@link TopDocs#GetMaxScore()} returns Float.NaN. Setting this to + true affects performance as it incurs the score computation on + each result. Also, setting this true automatically sets + trackDocScores to true as well. + + specifies whether documents are scored in doc Id order or not by + the given {@link Scorer} in {@link #SetScorer(Scorer)}. + + a {@link TopFieldCollector} instance which will sort the results by + the sort criteria. + + IOException +
+ +

Wrapper to allow {@link SpanQuery} objects participate in composite + single-field SpanQueries by 'lying' about their search field. That is, + the masked SpanQuery will function as normal, + but {@link SpanQuery#GetField()} simply hands back the value supplied + in this class's constructor.

+ +

This can be used to support Queries like {@link SpanNearQuery} or + {@link SpanOrQuery} across different fields, which is not ordinarily + permitted.

+ +

This can be useful for denormalized relational data: for example, when + indexing a document with conceptually many 'children':

+ +

+            teacherid: 1
+            studentfirstname: james
+            studentsurname: jones
+            
+            teacherid: 2
+            studenfirstname: james
+            studentsurname: smith
+            studentfirstname: sally
+            studentsurname: jones
+            
+ +

a SpanNearQuery with a slop of 0 can be applied across two + {@link SpanTermQuery} objects as follows: +

+            SpanQuery q1  = new SpanTermQuery(new Term("studentfirstname", "james"));
+            SpanQuery q2  = new SpanTermQuery(new Term("studentsurname", "jones"));
+            SpanQuery q2m new FieldMaskingSpanQuery(q2, "studentfirstname");
+            Query q = new SpanNearQuery(new SpanQuery[]{q1, q2m}, -1, false);
+            
+ to search for 'studentfirstname:james studentsurname:jones' and find + teacherid 1 without matching teacherid 2 (which has a 'james' in position 0 + and 'jones' in position 1).

+ +

Note: as {@link #GetField()} returns the masked field, scoring will be + done using the norms of the field name supplied. This may lead to unexpected + scoring behaviour.

+

+
+ + use {@link #ExtractTerms(Set)} instead. + + + + Constrains search results to only match those which also match a provided + query. Also provides position information about where each document matches + at the cost of extra space compared with the QueryWrapperFilter. + There is an added cost to this above what is stored in a {@link QueryWrapperFilter}. Namely, + the position information for each matching document is stored. +

+ This filter does not cache. See the {@link Lucene.Net.Search.CachingSpanFilter} for a wrapper that + caches. + + +

+ $Id:$ + +
+ + Constructs a filter which only matches documents matching + query. + + The {@link Lucene.Net.Search.Spans.SpanQuery} to use as the basis for the Filter. + + + + Abstract base class for sorting hits returned by a Query. + +

+ This class should only be used if the other SortField types (SCORE, DOC, + STRING, INT, FLOAT) do not provide an adequate sorting. It maintains an + internal cache of values which could be quite large. The cache is an array of + Comparable, one for each document in the index. There is a distinct + Comparable for each unique term in the field - if some documents have the + same term in the field, the cache array will have entries which reference the + same Comparable. + + This class will be used as part of a key to a FieldCache value. You must + implement hashCode and equals to avoid an explosion in RAM usage if you use + instances that are not the same instance. If you are searching using the + Remote contrib, the same instance of this class on the client will be a new + instance on every call to the server, so hashCode/equals is very important in + that situation. + +

+ Created: Apr 21, 2004 5:08:38 PM + + +

+ $Id: SortComparator.java 800119 2009-08-02 17:59:21Z markrmiller $ + + 1.4 + + Please use {@link FieldComparatorSource} instead. + +
+ + Expert: returns a comparator for sorting ScoreDocs. + +

+ Created: Apr 21, 2004 3:49:28 PM + + This class will be used as part of a key to a FieldCache value. You must + implement hashCode and equals to avoid an explosion in RAM usage if you use + instances that are not the same instance. If you are searching using the + Remote contrib, the same instance of this class on the client will be a new + instance on every call to the server, so hashCode/equals is very important in + that situation. + +

+ $Id: SortComparatorSource.java 747019 2009-02-23 13:59:50Z + mikemccand $ + + 1.4 + + Please use {@link FieldComparatorSource} instead. + +
+ + Creates a comparator for the field in the given index. + Index to create comparator for. + + Name of the field to create comparator for. + + Comparator of ScoreDoc objects. + + IOException If an error occurs reading the index. + + + Returns an object which, when sorted according to natural order, + will order the Term values in the correct order. +

For example, if the Terms contained integer values, this method + would return new Integer(termtext). Note that this + might not always be the most efficient implementation - for this + particular example, a better implementation might be to make a + ScoreDocLookupComparator that uses an internal lookup table of int. +

+ The textual value of the term. + + An object representing termtext that sorts according to the natural order of termtext. + + + + + +
+ + MultiPhraseQuery is a generalized version of PhraseQuery, with an added + method {@link #Add(Term[])}. + To use this class, to search for the phrase "Microsoft app*" first use + add(Term) on the term "Microsoft", then find all terms that have "app" as + prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[] + terms) to add them to the query. + + + 1.0 + + + + Sets the phrase slop for this query. + + + + + Sets the phrase slop for this query. + + + + + Add a single term at the next position in the phrase. + + + + + Add multiple terms at the next position in the phrase. Any of the terms + may match. + + + + + + + Allows to specify the relative position of terms within the phrase. + + + + + + + + + + + Returns a List<Term[]> of the terms in the multiphrase. + Do not modify the List or its contents. + + + + Returns the relative positions of terms in this phrase. + + + Prints a user-readable version of this query. + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + Wrapper for ({@link HitCollector}) implementations, which simply re-bases the + incoming docID before calling {@link HitCollector#collect}. + + + Please migrate custom HitCollectors to the new {@link Collector} + class. This class will be removed when {@link HitCollector} is + removed. + + + + use {@link #Score(Collector, int, int)} instead. + + + + use {@link #DocID()} instead. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Score(Collector)} instead. + + + + use {@link #Advance(int)} instead. + + + + use {@link #DocID()} instead. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + A simple hash table of document scores within a range. + + + A {@link IndexDeletionPolicy} that wraps around any other + {@link IndexDeletionPolicy} and adds the ability to hold and + later release a single "snapshot" of an index. While + the snapshot is held, the {@link IndexWriter} will not + remove any files associated with it even if the index is + otherwise being actively, arbitrarily changed. Because + we wrap another arbitrary {@link IndexDeletionPolicy}, this + gives you the freedom to continue using whatever {@link + IndexDeletionPolicy} you would normally want to use with your + index. Note that you can re-use a single instance of + SnapshotDeletionPolicy across multiple writers as long + as they are against the same index Directory. Any + snapshot held when a writer is closed will "survive" + when the next writer is opened. + +

WARNING: This API is a new and experimental and + may suddenly change.

+

+
+ + Take a snapshot of the most recent commit to the + index. You must call release() to free this snapshot. + Note that while the snapshot is held, the files it + references will not be deleted, which will consume + additional disk space in your index. If you take a + snapshot at a particularly bad time (say just before + you call optimize()) then in the worst case this could + consume an extra 1X of your total index size, until + you release the snapshot. + + + + Release the currently held snapshot. + + + + The number of the field this vector is associated with + + + + Used by DocumentsWriter to maintain per-thread state. + We keep a separate Posting hash and other state for each + thread and then merge postings hashes from all threads + when writing the segment. + + + + Holds state for inverting all occurrences of a single + field in the document. This class doesn't do anything + itself; instead, it forwards the tokens produced by + analysis to its own consumer + (InvertedDocConsumerPerField). It also interacts with an + endConsumer (InvertedDocEndConsumerPerField). + + + + Processes all occurrences of a single field + + + Process the document. If there is + something for this document to be done in docID order, + you should encapsulate that as a + DocumentsWriter.DocWriter and return it. + DocumentsWriter then calls finish() on this object + when it's its turn. + + + + A {@link MergeScheduler} that runs each merge using a + separate thread, up until a maximum number of threads + ({@link #setMaxThreadCount}) at which when a merge is + needed, the thread(s) that are updating the index will + pause until one or more merges completes. This is a + simple way to use concurrency in the indexing process + without having to create and manage application level + threads. + + + + Sets the max # simultaneous threads that may be + running. If a merge is necessary yet we already have + this many threads running, the incoming thread (that + is calling add/updateDocument) will block until + a merge thread has completed. + + + + Get the max # simultaneous threads that may be + + + + + Return the priority that merge threads run at. By + default the priority is 1 plus the priority of (ie, + slightly higher priority than) the first thread that + calls merge. + + + + Return the priority that merge threads run at. + + + Does the actual merge, by calling {@link IndexWriter#merge} + + + Create and return a new MergeThread + + + Called when an exception is hit in a background merge + thread + + + + Used for testing + + + Used for testing + + + Used for testing + + + Used for testing + + + Used for testing + + + Declare what fields to load normally and what fields to load lazily + + + + + + Pass in the Set of {@link Field} names to load and the Set of {@link Field} names to load lazily. If both are null, the + Document will not have any {@link Field} on it. + + A Set of {@link String} field names to load. May be empty, but not null + + A Set of {@link String} field names to load lazily. May be empty, but not null + + + + Indicate whether to load the field with the given name or not. If the {@link Field#Name()} is not in either of the + initializing Sets, then {@link Lucene.Net.Documents.FieldSelectorResult#NO_LOAD} is returned. If a Field name + is in both fieldsToLoad and lazyFieldsToLoad, lazy has precedence. + + + The {@link Field} name to check + + The {@link FieldSelectorResult} + + + + The term text of a Token. + + + Returns the Token's term text. + + This method has a performance penalty + because the text is stored internally in a char[]. If + possible, use {@link #TermBuffer()} and {@link + #TermLength()} directly instead. If you really need a + String, use this method, which is nothing more than + a convenience call to new String(token.termBuffer(), 0, token.termLength()) + + + + Copies the contents of buffer, starting at offset for + length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Copies the contents of buffer into the termBuffer array. + the buffer to copy + + + + Copies the contents of buffer, starting at offset and continuing + for length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Returns the internal termBuffer character array which + you can then directly alter. If the array is too + small for your token, use {@link + #ResizeTermBuffer(int)} to increase it. After + altering the buffer be sure to call {@link + #setTermLength} to record the number of valid + characters that were placed into the termBuffer. + + + + Grows the termBuffer to at least size newSize, preserving the + existing content. Note: If the next operation is to change + the contents of the term buffer use + {@link #SetTermBuffer(char[], int, int)}, + {@link #SetTermBuffer(String)}, or + {@link #SetTermBuffer(String, int, int)} + to optimally combine the resize with the setting of the termBuffer. + + minimum size of the new termBuffer + + newly created termBuffer with length >= newSize + + + + Allocates a buffer char[] of at least newSize, without preserving the existing content. + its always used in places that set the content + + minimum size of the buffer + + + + Return number of valid characters (length of the term) + in the termBuffer array. + + + + Set number of valid characters (length of the term) in + the termBuffer array. Use this to truncate the termBuffer + or to synchronize with external manipulation of the termBuffer. + Note: to grow the size of the array, + use {@link #ResizeTermBuffer(int)} first. + + the truncated length + + + + + Not implemented. Waiting for volunteers. + + + + + Not implemented. Waiting for volunteers. + + + + Simple standalone tool that forever acquires & releases a + lock using a specific LockFactory. Run without any args + to see usage. + + + + + + + + + A {@link HitCollector} implementation that collects the top-sorting + documents, returning them as a {@link TopFieldDocs}. This is used by {@link + IndexSearcher} to implement {@link TopFieldDocs}-based search. + +

This may be extended, overriding the collect method to, e.g., + conditionally invoke super() in order to filter which + documents are collected. + +

+ Please use {@link TopFieldCollector} instead. + +
+ + A {@link HitCollector} implementation that collects the top-scoring + documents, returning them as a {@link TopDocs}. This is used by {@link + IndexSearcher} to implement {@link TopDocs}-based search. + +

This may be extended, overriding the collect method to, e.g., + conditionally invoke super() in order to filter which + documents are collected. + +

+ Please use {@link TopScoreDocCollector} + instead, which has better performance. + + +
+ + The total number of hits the collector encountered. + + + The priority queue which holds the top-scoring documents. + + + Construct to collect a given number of hits. + the maximum number of hits to collect + + + + use TopDocCollector(hq) instead. numHits is not used by this + constructor. It will be removed in a future release. + + + + Constructor to collect the top-scoring documents by using the given PQ. + the PQ to use by this instance. + + + + The total number of documents that matched this query. + + + The top-scoring hits. + + + Construct to collect a given number of hits. + the index to be searched + + the sort criteria + + the maximum number of hits to collect + + + + Expert: + Public for extension only + + + + Matches spans near the beginning of a field. + + + Construct a SpanFirstQuery matching spans in match whose end + position is less than or equal to end. + + + + Return the SpanQuery whose matches are filtered. + + + Return the maximum end position permitted in a match. + + + Returns a collection of all terms matched by this query. + use extractTerms instead + + + + + + A query that matches all documents. + + + + + Field used for normalization factor (document boost). Null if nothing. + + + + use {@link #DocID()} instead. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + An alternative to BooleanScorer that also allows a minimum number + of optional scorers that should match. +
Implements skipTo(), and has no limitations on the numbers of added scorers. +
Uses ConjunctionScorer, DisjunctionScorer, ReqOptScorer and ReqExclScorer. +
+
+ + The scorer to which all scoring will be delegated, + except for computing and using the coordination factor. + + + + The number of optionalScorers that need to match (if there are any) + + + Creates a {@link Scorer} with the given similarity and lists of required, + prohibited and optional scorers. In no required scorers are added, at least + one of the optional scorers will have to match during the search. + + + The similarity to be used. + + The minimum number of optional added scorers that should match + during the search. In case no required scorers are added, at least + one of the optional scorers will have to match during the search. + + the list of required scorers. + + the list of prohibited scorers. + + the list of optional scorers. + + + + Returns the scorer to be used for match counting and score summing. + Uses requiredScorers, optionalScorers and prohibitedScorers. + + + + Returns the scorer to be used for match counting and score summing. + Uses the given required scorer and the prohibitedScorers. + + A required scorer already built. + + + + Scores and collects all matching documents. + The collector to which all matching documents are passed through + {@link HitCollector#Collect(int, float)}. +
When this method is used the {@link #Explain(int)} method should not be used. + + use {@link #Score(Collector)} instead. + +
+ + Scores and collects all matching documents. + The collector to which all matching documents are passed through. +
When this method is used the {@link #Explain(int)} method should not be used. + +
+ + Expert: Collects matching documents in a range. +
Note that {@link #Next()} must be called once before this method is + called for the first time. +
+ The collector to which all matching documents are passed through + {@link HitCollector#Collect(int, float)}. + + Do not score documents past this. + + true if more matching documents may remain. + + use {@link #Score(Collector, int, int)} instead. + +
+ + use {@link #DocID()} instead. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + Throws an UnsupportedOperationException. + TODO: Implement an explanation of the coordination factor. + + The document number for the explanation. + + UnsupportedOperationException + + + A Scorer for OR like queries, counterpart of ConjunctionScorer. + This Scorer implements {@link Scorer#SkipTo(int)} and uses skipTo() on the given Scorers. + TODO: Implement score(HitCollector, int). + + + + The number of subscorers. + + + The subscorers. + + + The minimum number of scorers that should match. + + + The scorerDocQueue contains all subscorers ordered by their current doc(), + with the minimum at the top. +
The scorerDocQueue is initialized the first time next() or skipTo() is called. +
An exhausted scorer is immediately removed from the scorerDocQueue. +
If less than the minimumNrMatchers scorers + remain in the scorerDocQueue next() and skipTo() return false. +

+ After each to call to next() or skipTo() + currentSumScore is the total score of the current matching doc, + nrMatchers is the number of matching scorers, + and all scorers are after the matching doc, or are exhausted. +

+
+ + The document number of the current match. + + + The number of subscorers that provide the current match. + + + Construct a DisjunctionScorer. + A collection of at least two subscorers. + + The positive minimum number of subscorers that should + match to match this query. +
When minimumNrMatchers is bigger than + the number of subScorers, + no matches will be produced. +
When minimumNrMatchers equals the number of subScorers, + it more efficient to use ConjunctionScorer. + +
+ + Construct a DisjunctionScorer, using one as the minimum number + of matching subscorers. + + + + Called the first time next() or skipTo() is called to + initialize scorerDocQueue. + + + + Scores and collects all matching documents. + The collector to which all matching documents are passed through + {@link HitCollector#Collect(int, float)}. +
When this method is used the {@link #Explain(int)} method should not be used. + + use {@link #Score(Collector)} instead. + +
+ + Scores and collects all matching documents. + The collector to which all matching documents are passed through. +
When this method is used the {@link #Explain(int)} method should not be used. + +
+ + Expert: Collects matching documents in a range. Hook for optimization. + Note that {@link #Next()} must be called once before this method is called + for the first time. + + The collector to which all matching documents are passed through + {@link HitCollector#Collect(int, float)}. + + Do not score documents past this. + + true if more matching documents may remain. + + use {@link #Score(Collector, int, int)} instead. + + + + Expert: Collects matching documents in a range. Hook for optimization. + Note that {@link #Next()} must be called once before this method is called + for the first time. + + The collector to which all matching documents are passed through. + + Do not score documents past this. + + true if more matching documents may remain. + + + + use {@link #NextDoc()} instead. + + + + Advance all subscorers after the current document determined by the + top of the scorerDocQueue. + Repeat until at least the minimum number of subscorers match on the same + document and all subscorers are after that document or are exhausted. +
On entry the scorerDocQueue has at least minimumNrMatchers + available. At least the scorer with the minimum document number will be advanced. +
+ true iff there is a match. +
In case there is a match, currentDoc, currentSumScore, + and nrMatchers describe the match. + + TODO: Investigate whether it is possible to use skipTo() when + the minimum number of matchers is bigger than one, ie. try and use the + character of ConjunctionScorer for the minimum number of matchers. + Also delay calling score() on the sub scorers until the minimum number of + matchers is reached. +
For this, a Scorer array with minimumNrMatchers elements might + hold Scorers at currentDoc that are temporarily popped from scorerQueue. +
+
+ + Returns the score of the current document matching the query. + Initially invalid, until {@link #Next()} is called the first time. + + + + use {@link #DocID()} instead. + + + + Returns the number of subscorers matching the current document. + Initially invalid, until {@link #Next()} is called the first time. + + + + Skips to the first match beyond the current whose document number is + greater than or equal to a given target.
+ When this method is used the {@link #Explain(int)} method should not be + used.
+ The implementation uses the skipTo() method on the subscorers. + +
+ The target document number. + + true iff there is such a match. + + use {@link #Advance(int)} instead. + +
+ + Advances to the first match beyond the current whose document number is + greater than or equal to a given target.
+ When this method is used the {@link #Explain(int)} method should not be + used.
+ The implementation uses the skipTo() method on the subscorers. + +
+ The target document number. + + the document whose number is greater than or equal to the given + target, or -1 if none exist. + +
+ + An explanation for the score of a given document. + + + + Scorer for conjunctions, sets of queries, all of which are required. + + + use {@link #DocID()} instead. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + Count a scorer as a single match. + + + use {@link #DocID()} instead. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + Token Manager. + + + Debug output. + + + Set debug output. + + + Token literal values. + + + Lexer state names. + + + Lex State array. + + + Constructor. + + + Constructor. + + + Reinitialise parser. + + + Reinitialise parser. + + + Switch to specified lex state. + + + Get the next Token. + + + Interface that exceptions should implement to support lazy loading of messages. + + For Native Language Support (NLS), system of software internationalization. + + This Interface should be implemented by all exceptions that require + translation + + + + + a instance of a class that implements the Message interface + + + + $Id: TermVectorsReader.java 687046 2008-08-19 13:01:11Z mikemccand $ + + + + Retrieve the length (in bytes) of the tvd and tvf + entries for the next numDocs starting with + startDocID. This is used for bulk copying when + merging segments, if the field numbers are + congruent. Once this returns, the tvf & tvd streams + are seeked to the startDocID. + + + + + The number of documents in the reader + + + + Retrieve the term vector for the given document and field + The document number to retrieve the vector for + + The field within the document to retrieve + + The TermFreqVector for the document and field or null if there is no termVector for this field. + + IOException if there is an error reading the term vector files + + + Return all term vectors stored for this document or null if the could not be read in. + + + The document number to retrieve the vector for + + All term frequency vectors + + IOException if there is an error reading the term vector files + + + + The field to read in + + The pointer within the tvf file where we should start reading + + The mapper used to map the TermVector + + IOException + + + Models the existing parallel array structure + + + Construct the vector + The {@link TermFreqVector} based on the mappings. + + + + A Payload is metadata that can be stored together with each occurrence + of a term. This metadata is stored inline in the posting list of the + specific term. +

+ To store payloads in the index a {@link TokenStream} has to be used that + produces payload data. +

+ Use {@link TermPositions#GetPayloadLength()} and {@link TermPositions#GetPayload(byte[], int)} + to retrieve the payloads from the index.
+ +

+
+ + the byte array containing the payload data + + + the offset within the byte array + + + the length of the payload data + + + Creates an empty payload and does not allocate a byte array. + + + Creates a new payload with the the given array as data. + A reference to the passed-in array is held, i. e. no + copy is made. + + + the data of this payload + + + + Creates a new payload with the the given array as data. + A reference to the passed-in array is held, i. e. no + copy is made. + + + the data of this payload + + the offset in the data byte array + + the length of the data + + + + Sets this payloads data. + A reference to the passed-in array is held, i. e. no + copy is made. + + + + Sets this payloads data. + A reference to the passed-in array is held, i. e. no + copy is made. + + + + Returns a reference to the underlying byte array + that holds this payloads data. + + + + Returns the offset in the underlying byte array + + + Returns the length of the payload data. + + + Returns the byte at the given index. + + + Allocates a new byte array, copies the payload data into it and returns it. + + + Copies the payload data to a byte array. + + + the target byte array + + the offset in the target byte array + + + + Clones this payload by creating a copy of the underlying + byte array. + + + + NOTE: this API is experimental and will likely change + + + Adds a new doc in this term. If this returns null + then we just skip consuming positions/payloads. + + + + Called when we are done adding docs to this term + + + Expert: A Directory instance that switches files between + two other Directory instances. +

Files with the specified extensions are placed in the + primary directory; others are placed in the secondary + directory. The provided Set must not change once passed + to this class, and must allow multiple threads to call + contains at once.

+ +

NOTE: this API is new and experimental and is + subject to suddenly change in the next release. +

+
+ + Return the primary directory + + + Return the secondary directory + + + Utility method to return a file's extension. + + + Writes bytes through to a primary IndexOutput, computing + checksum as it goes. Note that you cannot use seek(). + + + + Represents hits returned by {@link + * Searcher#search(Query,Filter,int)} and {@link + * Searcher#search(Query,int) + + + + The total number of hits for the query. + + + + + The top hits for the query. + + + Stores the maximum score value encountered, needed for normalizing. + + + Returns the maximum score value encountered. Note that in case + scores are not tracked, this returns {@link Float#NaN}. + + + + Sets the maximum score value encountered. + + + Constructs a TopDocs with a default maxScore=Float.NaN. + + + + + + Matches spans containing a term. + + + Construct a SpanTermQuery matching the named term's spans. + + + Return the term whose spans are matched. + + + Returns a collection of all terms matched by this query. + use extractTerms instead + + + + + + Constrains search results to only match those which also match a provided + query. + +

This could be used, for example, with a {@link TermRangeQuery} on a suitably + formatted date field to implement date filtering. One could re-use a single + QueryFilter that matches, e.g., only documents modified within the last + week. The QueryFilter and TermRangeQuery would only need to be reconstructed + once per day. + +

+ $Id:$ + +
+ + Constructs a filter which only matches documents matching + query. + + + + Use {@link #GetDocIdSet(IndexReader)} instead. + + + + Expert: obtains single byte field values from the + {@link Lucene.Net.Search.FieldCache FieldCache} + using getBytes() and makes those values + available as other numeric types, casting as needed. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+ for requirements" + on the field. + +

NOTE: with the switch in 2.9 to segment-based + searching, if {@link #getValues} is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API. Alternatively, for + a short-term fix, you could wrap your ValueSource using + {@link MultiValueSource}, which costs more CPU per lookup + but will not consume double the FieldCache RAM.

+ + + +

Create a cached byte field source with default string-to-byte parser. +
+ + Create a cached byte field source with a specific string-to-byte parser. + + + A Query that matches documents matching boolean combinations of other + queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other + BooleanQuerys. + + + + Return the maximum number of clauses permitted, 1024 by default. + Attempts to add more than the permitted number of clauses cause {@link + TooManyClauses} to be thrown. + + + + + + Set the maximum number of clauses permitted per BooleanQuery. + Default value is 1024. + + + + Constructs an empty boolean query. + + + Constructs an empty boolean query. + + {@link Similarity#Coord(int,int)} may be disabled in scoring, as + appropriate. For example, this score factor does not make sense for most + automatically generated queries, like {@link WildcardQuery} and {@link + FuzzyQuery}. + + + disables {@link Similarity#Coord(int,int)} in scoring. + + + + Returns true iff {@link Similarity#Coord(int,int)} is disabled in + scoring for this query instance. + + + + + + Specifies a minimum number of the optional BooleanClauses + which must be satisfied. + +

+ By default no optional clauses are necessary for a match + (unless there are no required clauses). If this method is used, + then the specified number of clauses is required. +

+

+ Use of this method is totally independent of specifying that + any specific clauses are required (or prohibited). This number will + only be compared against the number of matching optional clauses. +

+

+ EXPERT NOTE: Using this method may force collecting docs in order, + regardless of whether setAllowDocsOutOfOrder(true) has been called. +

+ +

+ the number of optional clauses that must match + + + +
+ + Gets the minimum number of the optional BooleanClauses + which must be satisifed. + + + + Adds a clause to a boolean query. + + + TooManyClauses if the new number of clauses exceeds the maximum clause number + + + + + Adds a clause to a boolean query. + TooManyClauses if the new number of clauses exceeds the maximum clause number + + + + + Returns the set of clauses in this query. + + + Returns the list of clauses in this query. + + + Whether hit docs may be collected out of docid order. + + + this will not be needed anymore, as + {@link Weight#ScoresDocsOutOfOrder()} is used. + + + + Expert: Indicates whether hit docs may be collected out of docid order. + +

+ Background: although the contract of the Scorer class requires that + documents be iterated in order of doc id, this was not true in early + versions of Lucene. Many pieces of functionality in the current Lucene code + base have undefined behavior if this contract is not upheld, but in some + specific simple cases may be faster. (For example: disjunction queries with + less than 32 prohibited clauses; This setting has no effect for other + queries.) +

+ +

+ Specifics: By setting this option to true, docid N might be scored for a + single segment before docid N-1. Across multiple segments, docs may be + scored out of order regardless of this setting - it only applies to scoring + a single segment. + + Being static, this setting is system wide. +

+ +

+ this is not needed anymore, as + {@link Weight#ScoresDocsOutOfOrder()} is used. + +
+ + Whether hit docs may be collected out of docid order. + + + + + this is not needed anymore, as + {@link Weight#ScoresDocsOutOfOrder()} is used. + + + + Use {@link #SetAllowDocsOutOfOrder(boolean)} instead. + + + + Use {@link #GetAllowDocsOutOfOrder()} instead. + + + + Prints a user-readable version of this query. + + + Returns true iff o is equal to this. + + + Returns a hash code value for this object. + + + Expert: Delegating scoring implementation. Useful in {@link + Query#GetSimilarity(Searcher)} implementations, to override only certain + methods of a Searcher's Similiarty implementation.. + + + + Expert: Scoring API. +

Subclasses implement search scoring. + +

The score of query q for document d correlates to the + cosine-distance or dot-product between document and query vectors in a + + Vector Space Model (VSM) of Information Retrieval. + A document whose vector is closer to the query vector in that model is scored higher. + + The score is computed as follows: + +

+ + +
+ + + + + + + + + + + +
+ score(q,d)   =   + coord(q,d)  ·  + queryNorm(q)  ·  + + + + ( + tf(t in d)  ·  + idf(t)2  ·  + t.getBoost() ·  + norm(t,d) + ) +
t in q
+
+ +

where +

    +
  1. + + tf(t in d) + correlates to the term's frequency, + defined as the number of times term t appears in the currently scored document d. + Documents that have more occurrences of a given term receive a higher score. + The default computation for tf(t in d) in + {@link Lucene.Net.Search.DefaultSimilarity#Tf(float) DefaultSimilarity} is: + +
     
    + + + + + +
    + {@link Lucene.Net.Search.DefaultSimilarity#Tf(float) tf(t in d)}   =   + + frequency½ +
    +
     
    +
  2. + +
  3. + + idf(t) stands for Inverse Document Frequency. This value + correlates to the inverse of docFreq + (the number of documents in which the term t appears). + This means rarer terms give higher contribution to the total score. + The default computation for idf(t) in + {@link Lucene.Net.Search.DefaultSimilarity#Idf(int, int) DefaultSimilarity} is: + +
     
    + + + + + + + +
    + {@link Lucene.Net.Search.DefaultSimilarity#Idf(int, int) idf(t)}  =   + + 1 + log ( + + + + + +
    numDocs
    –––––––––
    docFreq+1
    +
    + ) +
    +
     
    +
  4. + +
  5. + + coord(q,d) + is a score factor based on how many of the query terms are found in the specified document. + Typically, a document that contains more of the query's terms will receive a higher score + than another document with fewer query terms. + This is a search time factor computed in + {@link #Coord(int, int) coord(q,d)} + by the Similarity in effect at search time. +
     
    +
  6. + +
  7. + + queryNorm(q) + + is a normalizing factor used to make scores between queries comparable. + This factor does not affect document ranking (since all ranked documents are multiplied by the same factor), + but rather just attempts to make scores from different queries (or even different indexes) comparable. + This is a search time factor computed by the Similarity in effect at search time. + + The default computation in + {@link Lucene.Net.Search.DefaultSimilarity#QueryNorm(float) DefaultSimilarity} + is: +
     
    + + + + + +
    + queryNorm(q)   =   + {@link Lucene.Net.Search.DefaultSimilarity#QueryNorm(float) queryNorm(sumOfSquaredWeights)} +   =   + + + + + +
    1
    + –––––––––––––– +
    sumOfSquaredWeights½
    +
    +
     
    + + The sum of squared weights (of the query terms) is + computed by the query {@link Lucene.Net.Search.Weight} object. + For example, a {@link Lucene.Net.Search.BooleanQuery boolean query} + computes this value as: + +
     
    + + + + + + + + + + + +
    + {@link Lucene.Net.Search.Weight#SumOfSquaredWeights() sumOfSquaredWeights}   =   + {@link Lucene.Net.Search.Query#GetBoost() q.getBoost()} 2 +  ·  + + + + ( + idf(t)  ·  + t.getBoost() + ) 2 +
    t in q
    +
     
    + +
  8. + +
  9. + + t.getBoost() + is a search time boost of term t in the query q as + specified in the query text + (see query syntax), + or as set by application calls to + {@link Lucene.Net.Search.Query#SetBoost(float) setBoost()}. + Notice that there is really no direct API for accessing a boost of one term in a multi term query, + but rather multi terms are represented in a query as multi + {@link Lucene.Net.Search.TermQuery TermQuery} objects, + and so the boost of a term in the query is accessible by calling the sub-query + {@link Lucene.Net.Search.Query#GetBoost() getBoost()}. +
     
    +
  10. + +
  11. + + norm(t,d) encapsulates a few (indexing time) boost and length factors: + +
      +
    • Document boost - set by calling + {@link Lucene.Net.Documents.Document#SetBoost(float) doc.setBoost()} + before adding the document to the index. +
    • +
    • Field boost - set by calling + {@link Lucene.Net.Documents.Fieldable#SetBoost(float) field.setBoost()} + before adding the field to a document. +
    • +
    • {@link #LengthNorm(String, int) lengthNorm(field)} - computed + when the document is added to the index in accordance with the number of tokens + of this field in the document, so that shorter fields contribute more to the score. + LengthNorm is computed by the Similarity class in effect at indexing. +
    • +
    + +

    + When a document is added to the index, all the above factors are multiplied. + If the document has multiple fields with the same name, all their boosts are multiplied together: + +
     
    + + + + + + + + + + + +
    + norm(t,d)   =   + {@link Lucene.Net.Documents.Document#GetBoost() doc.getBoost()} +  ·  + {@link #LengthNorm(String, int) lengthNorm(field)} +  ·  + + + + {@link Lucene.Net.Documents.Fieldable#GetBoost() f.getBoost}() +
    field f in d named as t
    +
     
    + However the resulted norm value is {@link #EncodeNorm(float) encoded} as a single byte + before being stored. + At search time, the norm byte value is read from the index + {@link Lucene.Net.Store.Directory directory} and + {@link #DecodeNorm(byte) decoded} back to a float norm value. + This encoding/decoding, while reducing index size, comes with the price of + precision loss - it is not guaranteed that decode(encode(x)) = x. + For instance, decode(encode(0.89)) = 0.75. + Also notice that search time is too late to modify this norm part of scoring, e.g. by + using a different {@link Similarity} for search. +
     
    +

  12. +
+ +
+ + + + + + +
+ + Set the default Similarity implementation used by indexing and search + code. + + + + + + + + + Return the default Similarity implementation used by indexing and search + code. + +

This is initially an instance of {@link DefaultSimilarity}. + +

+ + + + +
+ + Cache of decoded bytes. + + + Decodes a normalization factor stored in an index. + + + + + Returns a table for decoding normalization bytes. + + + + + Compute the normalization value for a field, given the accumulated + state of term processing for this field (see {@link FieldInvertState}). + +

Implementations should calculate a float value based on the field + state and then return that value. + +

For backward compatibility this method by default calls + {@link #LengthNorm(String, int)} passing + {@link FieldInvertState#GetLength()} as the second argument, and + then multiplies this value by {@link FieldInvertState#GetBoost()}.

+ +

WARNING: This API is new and experimental and may + suddenly change.

+ +

+ field name + + current processing state for this field + + the calculated float norm + +
+ + Computes the normalization value for a field given the total number of + terms contained in a field. These values, together with field boosts, are + stored in an index and multipled into scores for hits on each field by the + search code. + +

Matches in longer fields are less precise, so implementations of this + method usually return smaller values when numTokens is large, + and larger values when numTokens is small. + +

Note that the return values are computed under + {@link Lucene.Net.Index.IndexWriter#AddDocument(Lucene.Net.Documents.Document)} + and then stored using + {@link #EncodeNorm(float)}. + Thus they have limited precision, and documents + must be re-indexed if this method is altered. + +

+ the name of the field + + the total number of tokens contained in fields named + fieldName of doc. + + a normalization factor for hits on this field of this document + + + + +
+ + Computes the normalization value for a query given the sum of the squared + weights of each of the query terms. This value is then multipled into the + weight of each query term. + +

This does not affect ranking, but rather just attempts to make scores + from different queries comparable. + +

+ the sum of the squares of query term weights + + a normalization factor for query weights + +
+ + Encodes a normalization factor for storage in an index. + +

The encoding uses a three-bit mantissa, a five-bit exponent, and + the zero-exponent point at 15, thus + representing values from around 7x10^9 to 2x10^-9 with about one + significant decimal digit of accuracy. Zero is also represented. + Negative numbers are rounded up to zero. Values too large to represent + are rounded down to the largest representable value. Positive values too + small to represent are rounded up to the smallest positive representable + value. + +

+ + + + +
+ + Computes a score factor based on a term or phrase's frequency in a + document. This value is multiplied by the {@link #Idf(Term, Searcher)} + factor for each term in the query and these products are then summed to + form the initial score for a document. + +

Terms and phrases repeated in a document indicate the topic of the + document, so implementations of this method usually return larger values + when freq is large, and smaller values when freq + is small. + +

The default implementation calls {@link #Tf(float)}. + +

+ the frequency of a term within a document + + a score factor based on a term's within-document frequency + +
+ + Computes the amount of a sloppy phrase match, based on an edit distance. + This value is summed for each sloppy phrase match in a document to form + the frequency that is passed to {@link #Tf(float)}. + +

A phrase match with a small edit distance to a document passage more + closely matches the document, so implementations of this method usually + return larger values when the edit distance is small and smaller values + when it is large. + +

+ + + the edit distance of this sloppy phrase match + + the frequency increment for this match + +
+ + Computes a score factor based on a term or phrase's frequency in a + document. This value is multiplied by the {@link #Idf(Term, Searcher)} + factor for each term in the query and these products are then summed to + form the initial score for a document. + +

Terms and phrases repeated in a document indicate the topic of the + document, so implementations of this method usually return larger values + when freq is large, and smaller values when freq + is small. + +

+ the frequency of a term within a document + + a score factor based on a term's within-document frequency + +
+ + Computes a score factor for a simple term. + +

The default implementation is:

+            return idf(searcher.docFreq(term), searcher.maxDoc());
+            
+ + Note that {@link Searcher#MaxDoc()} is used instead of + {@link Lucene.Net.Index.IndexReader#NumDocs()} because it is proportional to + {@link Searcher#DocFreq(Term)} , i.e., when one is inaccurate, + so is the other, and in the same direction. + +
+ the term in question + + the document collection being searched + + a score factor for the term + + see {@link #IdfExplain(Term, Searcher)} + +
+ + Computes a score factor for a simple term and returns an explanation + for that score factor. + +

+ The default implementation uses: + +

+            idf(searcher.docFreq(term), searcher.maxDoc());
+            
+ + Note that {@link Searcher#MaxDoc()} is used instead of + {@link Lucene.Net.Index.IndexReader#NumDocs()} because it is + proportional to {@link Searcher#DocFreq(Term)} , i.e., when one is + inaccurate, so is the other, and in the same direction. + +
+ the term in question + + the document collection being searched + + an IDFExplain object that includes both an idf score factor + and an explanation for the term. + + IOException +
+ + Computes a score factor for a phrase. + +

The default implementation sums the {@link #Idf(Term,Searcher)} factor + for each term in the phrase. + +

+ the terms in the phrase + + the document collection being searched + + idf score factor + + see {@link #idfExplain(Collection, Searcher)} + +
+ + Computes a score factor for a phrase. + +

+ The default implementation sums the idf factor for + each term in the phrase. + +

+ the terms in the phrase + + the document collection being searched + + an IDFExplain object that includes both an idf + score factor for the phrase and an explanation + for each term. + + IOException +
+ + Computes a score factor based on a term's document frequency (the number + of documents which contain the term). This value is multiplied by the + {@link #Tf(int)} factor for each term in the query and these products are + then summed to form the initial score for a document. + +

Terms that occur in fewer documents are better indicators of topic, so + implementations of this method usually return larger values for rare terms, + and smaller values for common terms. + +

+ the number of documents which contain the term + + the total number of documents in the collection + + a score factor based on the term's document frequency + +
+ + Computes a score factor based on the fraction of all query terms that a + document contains. This value is multiplied into scores. + +

The presence of a large portion of the query terms indicates a better + match with the query, so implementations of this method usually return + larger values when the ratio between these parameters is large and smaller + values when the ratio between them is small. + +

+ the number of query terms matched in the document + + the total number of terms in the query + + a score factor based on term overlap with the query + +
+ + Calculate a scoring factor based on the data in the payload. Overriding implementations + are responsible for interpreting what is in the payload. Lucene makes no assumptions about + what is in the byte array. +

+ The default implementation returns 1. + +

+ The fieldName of the term this payload belongs to + + The payload byte array to be scored + + The offset into the payload array + + The length in the array + + An implementation dependent float to be used as a scoring factor + + + See {@link #ScorePayload(int, String, int, int, byte[], int, int)} + +
+ + Calculate a scoring factor based on the data in the payload. Overriding implementations + are responsible for interpreting what is in the payload. Lucene makes no assumptions about + what is in the byte array. +

+ The default implementation returns 1. + +

+ The docId currently being scored. If this value is {@link #NO_DOC_ID_PROVIDED}, then it should be assumed that the PayloadQuery implementation does not provide document information + + The fieldName of the term this payload belongs to + + The start position of the payload + + The end position of the payload + + The payload byte array to be scored + + The offset into the payload array + + The length in the array + + An implementation dependent float to be used as a scoring factor + + +
+ + Remove this when old API is removed! + + + + Remove this when old API is removed! + + + + Remove this when old API is removed! + + + + The Similarity implementation used by default. + TODO: move back to top when old API is removed! + + + + + Remove this when old API is removed! + + + + Remove this when old API is removed! + + + + Remove this when old API is removed! + + + + Construct a {@link Similarity} that delegates all methods to another. + + + the Similarity implementation to delegate to + + + + Thrown when an attempt is made to add more than {@link + #GetMaxClauseCount()} clauses. This typically happens if + a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery + is expanded to many terms during search. + + + + Expert: the Weight for BooleanQuery, used to + normalize, score and explain these queries. + +

NOTE: this API and implementation is subject to + change suddenly in the next release.

+

+
+ + The Similarity implementation. + + + MessageBundles classes extend this class, to implement a bundle. + + For Native Language Support (NLS), system of software internationalization. + + This interface is similar to the NLS class in eclipse.osgi.util.NLS class - + initializeMessages() method resets the values of all static strings, should + only be called by classes that extend from NLS (see TestMessages.java for + reference) - performs validation of all message in a bundle, at class load + time - performs per message validation at runtime - see NLSTest.java for + usage reference + + MessageBundle classes may subclass this type. + + + + Initialize a given class with the message bundle Keys Should be called from + a class that extends NLS in a static block at class load time. + + + Property file with that contains the message bundle + + where constants will reside + + + + + + + + + + + + + + - Message Key + + + + + Performs the priviliged action. + + A value that may represent the result of the action. + + + Add a complete document specified by all its term vectors. If document has no + term vectors, add value for tvx. + + + + + IOException + + + Do a bulk copy of numDocs documents from reader to our + streams. This is used to expedite merging, if the + field numbers are congruent. + + + + Close all streams. + + + A {@link MergeScheduler} that simply does each merge + sequentially, using the current thread. + + + + Just do the merges in sequence. We do this + "synchronized" so that even if the application is using + multiple threads, only one merge may run at a time. + + + + Used by DocumentsWriter to implemented a StringReader + that can be reset to a new string; we use this when + tokenizing the string value from a Field. + + + + Taps into DocInverter, as an InvertedDocEndConsumer, + which is called at the end of inverting each field. We + just look at the length for the field (docState.length) + and record the norm. + + + + This is a {@link LogMergePolicy} that measures size of a + segment as the number of documents (not taking deletions + into account). + + + + + + + + Sets the minimum size for the lowest level segments. + Any segments below this size are considered to be on + the same level (even if they vary drastically in size) + and will be merged whenever there are mergeFactor of + them. This effectively truncates the "long tail" of + small segments that would otherwise be created into a + single level. If you set this too large, it could + greatly increase the merging cost during indexing (if + you flush many small segments). + + + + Get the minimum size for a segment to remain + un-merged. + + + + + + For each Field, store a sorted collection of {@link TermVectorEntry}s +

+ This is not thread-safe. +

+
+ + + A Comparator for sorting {@link TermVectorEntry}s + + + + Get the mapping between fields and terms, sorted by the comparator + + + A map between field names and {@link java.util.SortedSet}s per field. SortedSet entries are {@link TermVectorEntry} + + + + Gathers all Fieldables for a document under the same + name, updates FieldInfos, and calls per-field consumers + to process field by field. + + Currently, only a single thread visits the fields, + sequentially, for processing. + + + + If there are fields we've seen but did not see again + in the last run, then free them up. + + + + Simple utility class providing static methods to + compress and decompress binary data for stored fields. + This class uses java.util.zip.Deflater and Inflater + classes to compress and decompress, which is the same + format previously used by the now deprecated + Field.Store.COMPRESS. + + + + Compresses the specified byte range using the + specified compressionLevel (constants are defined in + java.util.zip.Deflater). + + + + Compresses the specified byte range, with default BEST_COMPRESSION level + + + Compresses all bytes in the array, with default BEST_COMPRESSION level + + + Compresses the String value, with default BEST_COMPRESSION level + + + Compresses the String value using the specified + compressionLevel (constants are defined in + java.util.zip.Deflater). + + + + Decompress the byte array previously returned by + compress + + + + Decompress the byte array previously returned by + compressString back into a String + + + + LowerCaseTokenizer performs the function of LetterTokenizer + and LowerCaseFilter together. It divides text at non-letters and converts + them to lower case. While it is functionally equivalent to the combination + of LetterTokenizer and LowerCaseFilter, there is a performance advantage + to doing the two tasks at once, hence this (redundant) implementation. +

+ Note: this does a decent job for most European languages, but does a terrible + job for some Asian languages, where words are not separated by spaces. +

+
+ + A LetterTokenizer is a tokenizer that divides text at non-letters. That's + to say, it defines tokens as maximal strings of adjacent letters, as defined + by java.lang.Character.isLetter() predicate. + Note: this does a decent job for most European languages, but does a terrible + job for some Asian languages, where words are not separated by spaces. + + + + An abstract base class for simple, character-oriented tokenizers. + + + Returns true iff a character should be included in a token. This + tokenizer generates as tokens adjacent sequences of characters which + satisfy this predicate. Characters for which this is false are used to + define token boundaries and are not included in tokens. + + + + Called on each token character to normalize it before it is added to the + token. The default implementation does nothing. Subclasses may use this + to, e.g., lowercase tokens. + + + + Will be removed in Lucene 3.0. This method is final, as it should + not be overridden. Delegates to the backwards compatibility layer. + + + + Will be removed in Lucene 3.0. This method is final, as it should + not be overridden. Delegates to the backwards compatibility layer. + + + + Construct a new LetterTokenizer. + + + Construct a new LetterTokenizer using a given {@link AttributeSource}. + + + Construct a new LetterTokenizer using a given {@link Lucene.Net.Util.AttributeSource.AttributeFactory}. + + + Collects only characters which satisfy + {@link Character#isLetter(char)}. + + + + Construct a new LowerCaseTokenizer. + + + Construct a new LowerCaseTokenizer using a given {@link AttributeSource}. + + + Construct a new LowerCaseTokenizer using a given {@link Lucene.Net.Util.AttributeSource.AttributeFactory}. + + + Converts char to lower case + {@link Character#toLowerCase(char)}. + + + + An iterator to iterate over set bits in an OpenBitSet. + This is faster than nextSetBit() for iterating over the complete set of bits, + especially when the density of the bits set is high. + + + $Id$ + + + + ** the python code that generated bitlist + def bits2int(val): + arr=0 + for shift in range(8,0,-1): + if val & 0x80: + arr = (arr << 4) | shift + val = val << 1 + return arr + def int_table(): + tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ] + return ','.join(tbl) + **** + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + use {@link #DocID()} instead. + + + + Implements the wildcard search query. Supported wildcards are *, which + matches any character sequence (including the empty one), and ?, + which matches any single character. Note this query can be slow, as it + needs to iterate over many terms. In order to prevent extremely slow WildcardQueries, + a Wildcard term should not start with one of the wildcards * or + ?. + +

This query uses the {@link + MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} + rewrite method. + +

+ + +
+ + Returns the pattern term. + + + Prints a user-readable version of this query. + + + Score a candidate doc for all slop-valid position-combinations (matches) + encountered while traversing/hopping the PhrasePositions. +
The score contribution of a match depends on the distance: +
- highest score for distance=0 (exact match). +
- score gets lower as distance gets higher. +
Example: for query "a b"~2, a document "x a b a y" can be scored twice: + once for "a b" (distance=0), and once for "b a" (distance=2). +
Possibly not all valid combinations are encountered, because for efficiency + we always propagate the least PhrasePosition. This allows to base on + PriorityQueue and move forward faster. + As result, for example, document "a b c b a" + would score differently for queries "a b c"~4 and "c b a"~4, although + they really are equivalent. + Similarly, for doc "a b c b a f g", query "c b"~2 + would get same score as "g f"~2, although "c b"~2 could be matched twice. + We may want to fix this in the future (currently not, for performance reasons). +
+
+ + Init PhrasePositions in place. + There is a one time initialization for this scorer: +
- Put in repeats[] each pp that has another pp with same position in the doc. +
- Also mark each such pp by pp.repeats = true. +
Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient. + In particular, this allows to score queries with no repetitions with no overhead due to this computation. +
- Example 1 - query with no repetitions: "ho my"~2 +
- Example 2 - query with repetitions: "ho my my"~2 +
- Example 3 - query with repetitions: "my ho my"~2 +
Init per doc w/repeats in query, includes propagating some repeating pp's to avoid false phrase detection. +
+ end (max position), or -1 if any term ran out (i.e. done) + + IOException +
+ + We disallow two pp's to have the same TermPosition, thereby verifying multiple occurrences + in the query of the same word would go elsewhere in the matched doc. + + null if differ (i.e. valid) otherwise return the higher offset PhrasePositions + out of the first two PPs found to not differ. + + + + A Scorer for queries with a required part and an optional part. + Delays skipTo() on the optional part until a score() is needed. +
+ This Scorer implements {@link Scorer#SkipTo(int)}. +
+
+ + The scorers passed from the constructor. + These are set to null as soon as their next() or skipTo() returns false. + + + + Construct a ReqOptScorer. + The required scorer. This must match. + + The optional scorer. This is used for scoring only. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + use {@link #DocID()} instead. + + + + Returns the score of the current document matching the query. + Initially invalid, until {@link #Next()} is called the first time. + + The score of the required scorer, eventually increased by the score + of the optional scorer when it also matches the current document. + + + + Explain the score of a document. + TODO: Also show the total score. + See BooleanScorer.explain() on how to do this. + + + + Expert: Maintains caches of term values. + +

Created: May 19, 2004 11:13:14 AM + +

+ lucene 1.4 + + $Id: FieldCache.java 807841 2009-08-25 22:27:31Z markrmiller $ + + + +
+ + Expert: Stores term text values and document ordering data. + + + All the term values, in natural order. + + + For each document, an index into the lookup array. + + + Creates one of these objects + + + Indicator for StringIndex values in the cache. + + + Expert: The cache used internally by sorting and range query classes. + + + The default parser for byte values, which are encoded by {@link Byte#toString(byte)} + + + The default parser for short values, which are encoded by {@link Short#toString(short)} + + + The default parser for int values, which are encoded by {@link Integer#toString(int)} + + + The default parser for float values, which are encoded by {@link Float#toString(float)} + + + The default parser for long values, which are encoded by {@link Long#toString(long)} + + + The default parser for double values, which are encoded by {@link Double#toString(double)} + + + A parser instance for int values encoded by {@link NumericUtils#IntToPrefixCoded(int)}, e.g. when indexed + via {@link NumericField}/{@link NumericTokenStream}. + + + + A parser instance for float values encoded with {@link NumericUtils}, e.g. when indexed + via {@link NumericField}/{@link NumericTokenStream}. + + + + A parser instance for long values encoded by {@link NumericUtils#LongToPrefixCoded(long)}, e.g. when indexed + via {@link NumericField}/{@link NumericTokenStream}. + + + + A parser instance for double values encoded with {@link NumericUtils}, e.g. when indexed + via {@link NumericField}/{@link NumericTokenStream}. + + + + Interface to parse bytes from document fields. + + + + + Return a single Byte representation of this field's value. + + + Interface to parse shorts from document fields. + + + + + Return a short representation of this field's value. + + + Interface to parse ints from document fields. + + + + + Return an integer representation of this field's value. + + + Interface to parse floats from document fields. + + + + + Return an float representation of this field's value. + + + The TermVectorOffsetInfo class holds information pertaining to a Term in a {@link Lucene.Net.Index.TermPositionVector}'s + offset information. This offset information is the character offset as set during the Analysis phase (and thus may not be the actual offset in the + original content). + + + + Convenience declaration when creating a {@link Lucene.Net.Index.TermPositionVector} that stores only position information. + + + The accessor for the ending offset for the term + The offset + + + + The accessor for the starting offset of the term. + + + The offset + + + + Two TermVectorOffsetInfos are equals if both the start and end offsets are the same + The comparison Object + + true if both {@link #GetStartOffset()} and {@link #GetEndOffset()} are the same for both objects. + + + + This is a DocFieldConsumer that writes stored fields. + + + Fills in any hole in the docIDs + + + + Base class for enumerating all but deleted docs. + +

NOTE: this class is meant only to be used internally + by Lucene; it's only public so it can be shared across + packages. This means the API is freely subject to + change, and, the class could be removed entirely, in any + Lucene release. Use directly at your own risk! */ +

+
+ + A {@link FieldSelector} based on a Map of field names to {@link FieldSelectorResult}s + + + + + Create a a MapFieldSelector + maps from field names (String) to {@link FieldSelectorResult}s + + + + Create a a MapFieldSelector + fields to LOAD. List of Strings. All other fields are NO_LOAD. + + + + Create a a MapFieldSelector + fields to LOAD. All other fields are NO_LOAD. + + + + Load field according to its associated value in fieldSelections + a field name + + the fieldSelections value that field maps to or NO_LOAD if none. + + + + Load the First field and break. +

+ See {@link FieldSelectorResult#LOAD_AND_BREAK} +

+
+ + Provides support for converting dates to strings and vice-versa. + The strings are structured so that lexicographic sorting orders by date, + which makes them suitable for use as field values and search terms. + +

Note that this class saves dates with millisecond granularity, + which is bad for {@link TermRangeQuery} and {@link PrefixQuery}, as those + queries are expanded to a BooleanQuery with a potentially large number + of terms when searching. Thus you might want to use + {@link DateTools} instead. + +

+ Note: dates before 1970 cannot be used, and therefore cannot be + indexed when using this class. See {@link DateTools} for an + alternative without such a limitation. + +

+ Another approach is {@link NumericUtils}, which provides + a sortable binary representation (prefix encoded) of numeric values, which + date/time are. + For indexing a {@link Date} or {@link Calendar}, just get the unix timestamp as + long using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and + index this as a numeric value with {@link NumericField} + and use {@link NumericRangeQuery} to query it. + +

+ If you build a new index, use {@link DateTools} or + {@link NumericField} instead. + This class is included for use with existing + indices and will be removed in a future release. + +
+ + Converts a Date to a string suitable for indexing. + RuntimeException if the date specified in the + method argument is before 1970 + + + + Converts a millisecond time to a string suitable for indexing. + RuntimeException if the time specified in the + method argument is negative, that is, before 1970 + + + + Converts a string-encoded date into a millisecond time. + + + Converts a string-encoded date into a Date object. + + + This class is a scanner generated by + JFlex 1.4.1 + on 9/4/08 6:49 PM from the specification file + /tango/mike/src/lucene.standarddigit/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex + + + + This character denotes the end of file + + + initial size of the lookahead buffer + + + lexical states + + + Translates characters to character classes + + + Translates characters to character classes + + + Translates DFA states to action switch labels. + + + Translates a state to a row index in the transition table + + + The transition table of the DFA + + + ZZ_ATTRIBUTE[aState] contains the attributes of state aState + + + the input device + + + the current state of the DFA + + + the current lexical state + + + this buffer contains the current text to be matched and is + the source of the yytext() string + + + + the textposition at the last accepting state + + + the textposition at the last state to be included in yytext + + + the current text position in the buffer + + + startRead marks the beginning of the yytext() string in the buffer + + + endRead marks the last character in the buffer, that has been read + from input + + + + number of newlines encountered up to the start of the matched text + + + the number of characters up to the start of the matched text + + + the number of characters from the last newline up to the start of the + matched text + + + + zzAtBOL == true <=> the scanner is currently at the beginning of a line + + + zzAtEOF == true <=> the scanner is at the EOF + + + this solves a bug where HOSTs that end with '.' are identified + as ACRONYMs. It is deprecated and will be removed in the next + release. + + + + Fills Lucene token with the current token text. + + + Fills TermAttribute with the current token text. + + + Creates a new scanner + There is also a java.io.InputStream version of this constructor. + + + the java.io.Reader to read input from. + + + + Creates a new scanner. + There is also java.io.Reader version of this constructor. + + + the java.io.Inputstream to read input from. + + + + Unpacks the compressed character translation table. + + + the packed character translation table + + the unpacked character translation table + + + + Refills the input buffer. + + + false, iff there was new input. + + + if any I/O-Error occurs + + + + Closes the input stream. + + + Resets the scanner to read from a new input stream. + Does not close the old reader. + + All internal variables are reset, the old input stream + cannot be reused (internal buffer is discarded and lost). + Lexical state is set to ZZ_INITIAL. + + + the new input stream + + + + Returns the current lexical state. + + + Enters a new lexical state + + + the new lexical state + + + + Returns the text matched by the current regular expression. + + + Returns the character at position pos from the + matched text. + + It is equivalent to yytext().charAt(pos), but faster + + + the position of the character to fetch. + A value from 0 to yylength()-1. + + + the character at position pos + + + + Returns the length of the matched text region. + + + Reports an error that occured while scanning. + + In a wellformed scanner (no or only correct usage of + yypushback(int) and a match-all fallback rule) this method + will only be called with things that "Can't Possibly Happen". + If this method is called, something is seriously wrong + (e.g. a JFlex bug producing a faulty scanner etc.). + + Usual syntax/scanner level error handling should be done + in error fallback rules. + + + the code of the errormessage to display + + + + Pushes the specified amount of characters back into the input stream. + + They will be read again by then next call of the scanning method + + + the number of characters to be read again. + This number must not be greater than yylength()! + + + + Resumes scanning until the next regular expression is matched, + the end of input is encountered or an I/O-Error occurs. + + + the next token + + if any I/O-Error occurs + + + + A grammar-based tokenizer constructed with JFlex + +

This should be a good tokenizer for most European-language documents: + +

    +
  • Splits words at punctuation characters, removing punctuation. However, a + dot that's not followed by whitespace is considered part of a token.
  • +
  • Splits words at hyphens, unless there's a number in the token, in which case + the whole token is interpreted as a product number and is not split.
  • +
  • Recognizes email addresses and internet hostnames as one token.
  • +
+ +

Many applications have specific tokenizer needs. If this tokenizer does + not suit your application, please consider copying this source code + directory to your project and maintaining your own grammar-based tokenizer. + + +

+ You must specify the required {@link Version} compatibility when creating + StandardAnalyzer: +

+
+
+ + this solves a bug where HOSTs that end with '.' are identified + as ACRONYMs. It is deprecated and will be removed in the next + release. + + + + A private instance of the JFlex-constructed scanner + + + String token types that correspond to token type int constants + + + Please use {@link #TOKEN_TYPES} instead + + + + Specifies whether deprecated acronyms should be replaced with HOST type. + This is false by default to support backward compatibility. +

+ See http://issues.apache.org/jira/browse/LUCENE-1068 + +

+ this should be removed in the next release (3.0). + +
+ + Set the max allowed token length. Any token longer + than this is skipped. + + + + + + + + Creates a new instance of the {@link StandardTokenizer}. Attaches the + input to a newly created JFlex scanner. + + Use {@link #StandardTokenizer(Version, Reader)} instead + + + + Creates a new instance of the {@link Lucene.Net.Analysis.Standard.StandardTokenizer}. Attaches + the input to the newly created JFlex scanner. + + + The input reader + + Set to true to replace mischaracterized acronyms with HOST. + + See http://issues.apache.org/jira/browse/LUCENE-1068 + + Use {@link #StandardTokenizer(Version, Reader)} instead + + + + Creates a new instance of the + {@link org.apache.lucene.analysis.standard.StandardTokenizer}. Attaches + the input to the newly created JFlex scanner. + + + The input reader + + See http://issues.apache.org/jira/browse/LUCENE-1068 + + + + Creates a new StandardTokenizer with a given {@link AttributeSource}. + Use + {@link #StandardTokenizer(Version, AttributeSource, Reader)} + instead + + + + Creates a new StandardTokenizer with a given {@link AttributeSource}. + + + Creates a new StandardTokenizer with a given {@link Lucene.Net.Util.AttributeSource.AttributeFactory} + Use + {@link #StandardTokenizer(Version, org.apache.lucene.util.AttributeSource.AttributeFactory, Reader)} + instead + + + + Creates a new StandardTokenizer with a given + {@link org.apache.lucene.util.AttributeSource.AttributeFactory} + + + + Will be removed in Lucene 3.0. This method is final, as it should + not be overridden. Delegates to the backwards compatibility layer. + + + + Will be removed in Lucene 3.0. This method is final, as it should + not be overridden. Delegates to the backwards compatibility layer. + + + + Prior to https://issues.apache.org/jira/browse/LUCENE-1068, StandardTokenizer mischaracterized as acronyms tokens like www.abc.com + when they should have been labeled as hosts instead. + + true if StandardTokenizer now returns these tokens as Hosts, otherwise false + + + Remove in 3.X and make true the only valid value + + + + + Set to true to replace mischaracterized acronyms as HOST. + + Remove in 3.X and make true the only valid value + + See https://issues.apache.org/jira/browse/LUCENE-1068 + + + + Holds a map of String input to String output, to be used + with {@link MappingCharFilter}. + + + + Records a replacement to be applied to the inputs + stream. Whenever singleMatch occurs in + the input, it will be replaced with + replacement. + + + input String to be replaced + + output String + + + + "Tokenizes" the entire stream as a single token. This is useful + for data like zip codes, ids, and some product names. + + + + Provides methods for sanity checking that entries in the FieldCache + are not wasteful or inconsistent. +

+

+ Lucene 2.9 Introduced numerous enhancements into how the FieldCache + is used by the low levels of Lucene searching (for Sorting and + ValueSourceQueries) to improve both the speed for Sorting, as well + as reopening of IndexReaders. But these changes have shifted the + usage of FieldCache from "top level" IndexReaders (frequently a + MultiReader or DirectoryReader) down to the leaf level SegmentReaders. + As a result, existing applications that directly access the FieldCache + may find RAM usage increase significantly when upgrading to 2.9 or + Later. This class provides an API for these applications (or their + Unit tests) to check at run time if the FieldCache contains "insane" + usages of the FieldCache. +

+

+ EXPERIMENTAL API: This API is considered extremely advanced and + experimental. It may be removed or altered w/o warning in future releases + of Lucene. +

+

+ + + + + + +
+ + If set, will be used to estimate size for all CacheEntry objects + dealt with. + + + + Quick and dirty convenience method + + + + + Quick and dirty convenience method that instantiates an instance with + "good defaults" and uses it to test the CacheEntry[] + + + + + + Tests a CacheEntry[] for indication of "insane" cache usage. +

+ NOTE:FieldCache CreationPlaceholder objects are ignored. + (:TODO: is this a bad idea? are we masking a real problem?) +

+

+
+ + Internal helper method used by check that iterates over + valMismatchKeys and generates a Collection of Insanity + instances accordingly. The MapOfSets are used to populate + the Insantiy objects. + + + + + + Internal helper method used by check that iterates over + the keys of readerFieldToValIds and generates a Collection + of Insanity instances whenever two (or more) ReaderField instances are + found that have an ancestery relationships. + + + + + + + Checks if the seed is an IndexReader, and if so will walk + the hierarchy of subReaders building up a list of the objects + returned by obj.getFieldCacheKey() + + + + Simple pair object for using "readerKey + fieldName" a Map key + + + Simple container for a collection of related CacheEntry objects that + in conjunction with eachother represent some "insane" usage of the + FieldCache. + + + + Type of insane behavior this object represents + + + Description of hte insane behavior + + + CacheEntry objects which suggest a problem + + + Multi-Line representation of this Insanity object, starting with + the Type and Msg, followed by each CacheEntry.toString() on it's + own line prefaced by a tab character + + + + An Enumaration of the differnet types of "insane" behavior that + may be detected in a FieldCache. + + + + + + + + + + + Indicates an overlap in cache usage on a given field + in sub/super readers. + + + +

+ Indicates entries have the same reader+fieldname but + different cached values. This can happen if different datatypes, + or parsers are used -- and while it's not necessarily a bug + it's typically an indication of a possible problem. +

+

+ PNOTE: Only the reader, fieldname, and cached value are actually + tested -- if two cache entries have different parsers or datatypes but + the cached values are the same Object (== not just equal()) this method + does not consider that a red flag. This allows for subtle variations + in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...) +

+

+
+ + Indicates an expected bit of "insanity". This may be useful for + clients that wish to preserve/log information about insane usage + but indicate that it was expected. + + + + A {@link Scorer} which wraps another scorer and caches the score of the + current document. Successive calls to {@link #Score()} will return the same + result and will not invoke the wrapped Scorer's score() method, unless the + current document has changed.
+ This class might be useful due to the changes done to the {@link Collector} + interface, in which the score is not computed for a document by default, only + if the collector requests it. Some collectors may need to use the score in + several places, however all they have in hand is a {@link Scorer} object, and + might end up computing the score of a document more than once. +
+
+ + Creates a new instance by wrapping the given scorer. + + + use {@link #DocID()} instead. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + Calculate the final score as the average score of all payloads seen. +

+ Is thread safe and completely reusable. + + +

+
+ + The Scorer for DisjunctionMaxQuery's. The union of all documents generated by the the subquery scorers + is generated in document number order. The score for each document is the maximum of the scores computed + by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores + for the other subqueries that generate the document. + + + + Creates a new instance of DisjunctionMaxScorer + + + Multiplier applied to non-maximum-scoring subqueries for a + document as they are summed into the result. + + -- not used since our definition involves neither coord nor terms + directly + + The sub scorers this Scorer should iterate on + + The actual number of scorers to iterate on. Note that the array's + length may be larger than the actual number of scorers. + + + + Generate the next document matching our associated DisjunctionMaxQuery. + + + true iff there is a next document + + use {@link #NextDoc()} instead. + + + + use {@link #DocID()} instead. + + + + Determine the current document score. Initially invalid, until {@link #Next()} is called the first time. + the score of the current generated document + + + + Advance to the first document beyond the current whose number is greater + than or equal to target. + + + the minimum number of the next desired document + + true iff there is a document to be generated whose number is at + least target + + use {@link #Advance(int)} instead. + + + + Explain a score that we computed. UNSUPPORTED -- see explanation capability in DisjunctionMaxQuery. + the number of a document we scored + + the Explanation for our score + + + + This abstract class writes skip lists with multiple levels. + + Example for skipInterval = 3: + c (skip level 2) + c c c (skip level 1) + x x x x x x x x x x (skip level 0) + d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list) + 3 6 9 12 15 18 21 24 27 30 (df) + + d - document + x - skip data + c - skip data with child pointer + + Skip level i contains every skipInterval-th entry from skip level i-1. + Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))). + + Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1. + This guarantess a logarithmic amount of skips to find the target document. + + While this class takes care of writing the different skip levels, + subclasses must define the actual format of the skip data. + + + + + Subclasses must implement the actual skip data encoding in this method. + + + the level skip data shall be writting for + + the skip buffer to write to + + + + Writes the current skip data to the buffers. The current document frequency determines + the max level is skip data is to be written to. + + + the current document frequency + + IOException + + + Writes the buffered skip lists to the given output. + + + the IndexOutput the skip lists shall be written to + + the pointer the skip list starts + + + + This abstract class reads skip lists with multiple levels. + + See {@link MultiLevelSkipListWriter} for the information about the encoding + of the multi level skip lists. + + Subclasses must implement the abstract method {@link #ReadSkipData(int, IndexInput)} + which defines the actual format of the skip data. + + + + Returns the id of the doc to which the last call of {@link #SkipTo(int)} + has skipped. + + + + Skips entries to the first beyond the current whose document number is + greater than or equal to target. Returns the current doc count. + + + + Seeks the skip entry on the given level + + + initializes the reader + + + Loads the skip levels + + + Subclasses must implement the actual skip data encoding in this method. + + + the level skip data shall be read from + + the skip stream to read from + + + + Copies the values of the last read skip entry on this level + + + used to buffer the top skip levels + + + An IndexWriter creates and maintains an index. +

The create argument to the {@link + #IndexWriter(Directory, Analyzer, boolean) constructor} determines + whether a new index is created, or whether an existing index is + opened. Note that you can open an index with create=true + even while readers are using the index. The old readers will + continue to search the "point in time" snapshot they had opened, + and won't see the newly created index until they re-open. There are + also {@link #IndexWriter(Directory, Analyzer) constructors} + with no create argument which will create a new index + if there is not already an index at the provided path and otherwise + open the existing index.

+

In either case, documents are added with {@link #AddDocument(Document) + addDocument} and removed with {@link #DeleteDocuments(Term)} or {@link + #DeleteDocuments(Query)}. A document can be updated with {@link + #UpdateDocument(Term, Document) updateDocument} (which just deletes + and then adds the entire document). When finished adding, deleting + and updating documents, {@link #Close() close} should be called.

+ +

These changes are buffered in memory and periodically + flushed to the {@link Directory} (during the above method + calls). A flush is triggered when there are enough + buffered deletes (see {@link #setMaxBufferedDeleteTerms}) + or enough added documents since the last flush, whichever + is sooner. For the added documents, flushing is triggered + either by RAM usage of the documents (see {@link + #setRAMBufferSizeMB}) or the number of added documents. + The default is to flush when RAM usage hits 16 MB. For + best indexing speed you should flush by RAM usage with a + large RAM buffer. Note that flushing just moves the + internal buffered state in IndexWriter into the index, but + these changes are not visible to IndexReader until either + {@link #Commit()} or {@link #close} is called. A flush may + also trigger one or more segment merges which by default + run with a background thread so as not to block the + addDocument calls (see below + for changing the {@link MergeScheduler}).

+ +

The optional autoCommit argument to the {@link + #IndexWriter(Directory, boolean, Analyzer) constructors} + controls visibility of the changes to {@link IndexReader} + instances reading the same index. When this is + false, changes are not visible until {@link + #Close()} or {@link #Commit()} is called. Note that changes will still be + flushed to the {@link Directory} as new files, but are + not committed (no new segments_N file is written + referencing the new files, nor are the files sync'd to stable storage) + until {@link #Close()} or {@link #Commit()} is called. If something + goes terribly wrong (for example the JVM crashes), then + the index will reflect none of the changes made since the + last commit, or the starting state if commit was not called. + You can also call {@link #Rollback()}, which closes the writer + without committing any changes, and removes any index + files that had been flushed but are now unreferenced. + This mode is useful for preventing readers from refreshing + at a bad time (for example after you've done all your + deletes but before you've done your adds). It can also be + used to implement simple single-writer transactional + semantics ("all or none"). You can do a two-phase commit + by calling {@link #PrepareCommit()} + followed by {@link #Commit()}. This is necessary when + Lucene is working with an external resource (for example, + a database) and both must either commit or rollback the + transaction.

+

When autoCommit is true then + the writer will periodically commit on its own. [Deprecated: Note that in 3.0, IndexWriter will + no longer accept autoCommit=true (it will be hardwired to + false). You can always call {@link #Commit()} yourself + when needed]. There is + no guarantee when exactly an auto commit will occur (it + used to be after every flush, but it is now after every + completed merge, as of 2.4). If you want to force a + commit, call {@link #Commit()}, or, close the writer. Once + a commit has finished, newly opened {@link IndexReader} instances will + see the changes to the index as of that commit. When + running in this mode, be careful not to refresh your + readers while optimize or segment merges are taking place + as this can tie up substantial disk space.

+

+

Regardless of autoCommit, an {@link + IndexReader} or {@link Lucene.Net.Search.IndexSearcher} will only see the + index as of the "point in time" that it was opened. Any + changes committed to the index after the reader was opened + are not visible until the reader is re-opened.

+

If an index will not have more documents added for a while and optimal search + performance is desired, then either the full {@link #Optimize() optimize} + method or partial {@link #Optimize(int)} method should be + called before the index is closed.

+

Opening an IndexWriter creates a lock file for the directory in use. Trying to open + another IndexWriter on the same directory will lead to a + {@link LockObtainFailedException}. The {@link LockObtainFailedException} + is also thrown if an IndexReader on the same directory is used to delete documents + from the index.

+

+ +

Expert: IndexWriter allows an optional + {@link IndexDeletionPolicy} implementation to be + specified. You can use this to control when prior commits + are deleted from the index. The default policy is {@link + KeepOnlyLastCommitDeletionPolicy} which removes all prior + commits as soon as a new commit is done (this matches + behavior before 2.2). Creating your own policy can allow + you to explicitly keep previous "point in time" commits + alive in the index for some time, to allow readers to + refresh to the new commit without having the old commit + deleted out from under them. This is necessary on + filesystems like NFS that do not support "delete on last + close" semantics, which Lucene's "point in time" search + normally relies on.

+

Expert: + IndexWriter allows you to separately change + the {@link MergePolicy} and the {@link MergeScheduler}. + The {@link MergePolicy} is invoked whenever there are + changes to the segments in the index. Its role is to + select which merges to do, if any, and return a {@link + MergePolicy.MergeSpecification} describing the merges. It + also selects merges to do for optimize(). (The default is + {@link LogByteSizeMergePolicy}. Then, the {@link + MergeScheduler} is invoked with the requested merges and + it decides when and how to run the merges. The default is + {@link ConcurrentMergeScheduler}.

+

NOTE: if you hit an + OutOfMemoryError then IndexWriter will quietly record this + fact and block all future segment commits. This is a + defensive measure in case any internal state (buffered + documents and deletions) were corrupted. Any subsequent + calls to {@link #Commit()} will throw an + IllegalStateException. The only course of action is to + call {@link #Close()}, which internally will call {@link + #Rollback()}, to undo any changes to the index since the + last commit. If you opened the writer with autoCommit + false you can also just call {@link #Rollback()} + directly.

+

NOTE: {@link + IndexWriter} instances are completely thread + safe, meaning multiple threads can call any of its + methods, concurrently. If your application requires + external synchronization, you should not + synchronize on the IndexWriter instance as + this may cause deadlock; use your own (non-Lucene) objects + instead.

+

+
+ + Name of the write lock in the index. + + + Value to denote a flush trigger is disabled + + + Default value is 16 MB (which means flush when buffered + docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}. + + + + Default value is 10,000. Change using {@link #SetMaxFieldLength(int)}. + + + Default value is 128. Change using {@link #SetTermIndexInterval(int)}. + + + Default value for the write lock timeout (1,000). + + + + + + + + + + + Disabled by default (because IndexWriter flushes by RAM usage + by default). Change using {@link #SetMaxBufferedDocs(int)}. + + + + Disabled by default (because IndexWriter flushes by RAM usage + by default). Change using {@link #SetMaxBufferedDeleteTerms(int)}. + + + + + + + + + + Absolute hard maximum length for a term. If a term + arrives from the analyzer longer than this length, it + is skipped and a message is printed to infoStream, if + set (see {@link #setInfoStream}). + + + + Default for {@link #getMaxSyncPauseSeconds}. On + Windows this defaults to 10.0 seconds; elsewhere it's + 0. + + + + Expert: returns a readonly reader, covering all committed as well as + un-committed changes to the index. This provides "near real-time" + searching, in that changes made during an IndexWriter session can be + quickly made available for searching without closing the writer nor + calling {@link #commit}. + +

+ Note that this is functionally equivalent to calling {#commit} and then + using {@link IndexReader#open} to open a new reader. But the turarnound + time of this method should be faster since it avoids the potentially + costly {@link #commit}. +

+ + You must close the {@link IndexReader} returned by this method once you are done using it. + +

+ It's near real-time because there is no hard + guarantee on how quickly you can get a new reader after + making changes with IndexWriter. You'll have to + experiment in your situation to determine if it's + faster enough. As this is a new and experimental + feature, please report back on your findings so we can + learn, improve and iterate.

+ +

The resulting reader suppports {@link + IndexReader#reopen}, but that call will simply forward + back to this method (though this may change in the + future).

+ +

The very first time this method is called, this + writer instance will make every effort to pool the + readers that it opens for doing merges, applying + deletes, etc. This means additional resources (RAM, + file descriptors, CPU time) will be consumed.

+ +

For lower latency on reopening a reader, you should call {@link #setMergedSegmentWarmer} + to call {@link #setMergedSegmentWarmer} to + pre-warm a newly merged segment before it's committed + to the index. This is important for minimizing index-to-search + delay after a large merge. + +

If an addIndexes* call is running in another thread, + then this reader will only search those segments from + the foreign index that have been successfully copied + over, so far

. + +

NOTE: Once the writer is closed, any + outstanding readers may continue to be used. However, + if you attempt to reopen any of those readers, you'll + hit an {@link AlreadyClosedException}.

+ +

NOTE: This API is experimental and might + change in incompatible ways in the next release.

+ +

+ IndexReader that covers entire index plus all + changes made so far by this IndexWriter instance + + + IOException +
+ + Expert: like {@link #getReader}, except you can + specify which termInfosIndexDivisor should be used for + any newly opened readers. + + Subsambles which indexed + terms are loaded into RAM. This has the same effect as {@link + IndexWriter#setTermIndexInterval} except that setting + must be done at indexing time while this setting can be + set per reader. When set to N, then one in every + N*termIndexInterval terms in the index is loaded into + memory. By setting this to a value > 1 you can reduce + memory usage, at the expense of higher latency when + loading a TermInfo. The default value is 1. Set this + to -1 to skip loading the terms index entirely. + + + + Obtain the number of deleted docs for a pooled reader. + If the reader isn't being pooled, the segmentInfo's + delCount is returned. + + + + Used internally to throw an {@link + AlreadyClosedException} if this IndexWriter has been + closed. + + AlreadyClosedException if this IndexWriter is + + + Prints a message to the infoStream (if non-null), + prefixed with the identifying information for this + writer and the thread that's calling it. + + + + Casts current mergePolicy to LogMergePolicy, and throws + an exception if the mergePolicy is not a LogMergePolicy. + + + +

Get the current setting of whether newly flushed + segments will use the compound file format. Note that + this just returns the value previously set with + setUseCompoundFile(boolean), or the default value + (true). You cannot use this to query the status of + previously flushed segments.

+ +

Note that this method is a convenience method: it + just calls mergePolicy.getUseCompoundFile as long as + mergePolicy is an instance of {@link LogMergePolicy}. + Otherwise an IllegalArgumentException is thrown.

+ +

+ + +
+ +

Setting to turn on usage of a compound file. When on, + multiple files for each segment are merged into a + single file when a new segment is flushed.

+ +

Note that this method is a convenience method: it + just calls mergePolicy.setUseCompoundFile as long as + mergePolicy is an instance of {@link LogMergePolicy}. + Otherwise an IllegalArgumentException is thrown.

+

+
+ + Expert: Set the Similarity implementation used by this IndexWriter. + + + + + + + Expert: Return the Similarity implementation used by this IndexWriter. + +

This defaults to the current value of {@link Similarity#GetDefault()}. +

+
+ + Expert: Set the interval between indexed terms. Large values cause less + memory to be used by IndexReader, but slow random-access to terms. Small + values cause more memory to be used by an IndexReader, and speed + random-access to terms. + + This parameter determines the amount of computation required per query + term, regardless of the number of documents that contain that term. In + particular, it is the maximum number of other terms that must be + scanned before a term is located and its frequency and position information + may be processed. In a large index with user-entered query terms, query + processing time is likely to be dominated not by term lookup but rather + by the processing of frequency and positional data. In a small index + or when many uncommon query terms are generated (e.g., by wildcard + queries) term lookup may become a dominant cost. + + In particular, numUniqueTerms/interval terms are read into + memory by an IndexReader, and, on average, interval/2 terms + must be scanned for each random term access. + + + + + + + Expert: Return the interval between indexed terms. + + + + + + + Constructs an IndexWriter for the index in path. + Text will be analyzed with a. If create + is true, then a new, empty index will be created in + path, replacing the index already there, + if any. + +

NOTE: autoCommit (see above) is set to false with this + constructor. + +

+ the path to the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + Maximum field length in number of tokens/terms: LIMITED, UNLIMITED, or user-specified + via the MaxFieldLength constructor. + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + + Use {@link #IndexWriter(Directory, Analyzer, + boolean, MaxFieldLength)} + +
+ + Constructs an IndexWriter for the index in path. + Text will be analyzed with a. If create + is true, then a new, empty index will be created in + path, replacing the index already there, if any. + + + the path to the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + + This constructor will be removed in the 3.0 release. + Use {@link + #IndexWriter(Directory,Analyzer,boolean,MaxFieldLength)} + instead, and call {@link #Commit()} when needed. + + + + Constructs an IndexWriter for the index in path. + Text will be analyzed with a. If create + is true, then a new, empty index will be created in + path, replacing the index already there, if any. + +

NOTE: autoCommit (see above) is set to false with this + constructor. + +

+ the path to the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified + via the MaxFieldLength constructor. + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + + Use {@link #IndexWriter(Directory, + Analyzer, boolean, MaxFieldLength)} + +
+ + Constructs an IndexWriter for the index in path. + Text will be analyzed with a. If create + is true, then a new, empty index will be created in + path, replacing the index already there, if any. + + + the path to the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + + This constructor will be removed in the 3.0 release. + Use {@link + #IndexWriter(Directory,Analyzer,boolean,MaxFieldLength)} + instead, and call {@link #Commit()} when needed. + + + + Constructs an IndexWriter for the index in d. + Text will be analyzed with a. If create + is true, then a new, empty index will be created in + d, replacing the index already there, if any. + +

NOTE: autoCommit (see above) is set to false with this + constructor. + +

+ the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified + via the MaxFieldLength constructor. + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + +
+ + Constructs an IndexWriter for the index in d. + Text will be analyzed with a. If create + is true, then a new, empty index will be created in + d, replacing the index already there, if any. + + + the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + + This constructor will be removed in the 3.0 + release, and call {@link #Commit()} when needed. + Use {@link #IndexWriter(Directory,Analyzer,boolean,MaxFieldLength)} instead. + + + + Constructs an IndexWriter for the index in + path, first creating it if it does not + already exist. Text will be analyzed with + a. + +

NOTE: autoCommit (see above) is set to false with this + constructor. + +

+ the path to the index directory + + the analyzer to use + + Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified + via the MaxFieldLength constructor. + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be + read/written to or if there is any other low-level + IO error + + Use {@link #IndexWriter(Directory, Analyzer, MaxFieldLength)} + +
+ + Constructs an IndexWriter for the index in + path, first creating it if it does not + already exist. Text will be analyzed with + a. + + + the path to the index directory + + the analyzer to use + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be + read/written to or if there is any other low-level + IO error + + This constructor will be removed in the 3.0 + release, and call {@link #Commit()} when needed. + Use {@link #IndexWriter(Directory,Analyzer,MaxFieldLength)} instead. + + + + Constructs an IndexWriter for the index in + path, first creating it if it does not + already exist. Text will be analyzed with + a. + +

NOTE: autoCommit (see above) is set to false with this + constructor. + +

+ the path to the index directory + + the analyzer to use + + Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified + via the MaxFieldLength constructor. + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be + read/written to or if there is any other low-level + IO error + + Use {@link #IndexWriter(Directory, + Analyzer, MaxFieldLength)} + +
+ + Constructs an IndexWriter for the index in + path, first creating it if it does not + already exist. Text will be analyzed with + a. + + + the path to the index directory + + the analyzer to use + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be + read/written to or if there is any other low-level + IO error + + This constructor will be removed in the 3.0 release. + Use {@link #IndexWriter(Directory,Analyzer,MaxFieldLength)} + instead, and call {@link #Commit()} when needed. + + + + Constructs an IndexWriter for the index in + d, first creating it if it does not + already exist. Text will be analyzed with + a. + +

NOTE: autoCommit (see above) is set to false with this + constructor. + +

+ the index directory + + the analyzer to use + + Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified + via the MaxFieldLength constructor. + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be + read/written to or if there is any other low-level + IO error + +
+ + Constructs an IndexWriter for the index in + d, first creating it if it does not + already exist. Text will be analyzed with + a. + + + the index directory + + the analyzer to use + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be + read/written to or if there is any other low-level + IO error + + This constructor will be removed in the 3.0 release. + Use {@link + #IndexWriter(Directory,Analyzer,MaxFieldLength)} + instead, and call {@link #Commit()} when needed. + + + + Constructs an IndexWriter for the index in + d, first creating it if it does not + already exist. Text will be analyzed with + a. + + + the index directory + + see above + + the analyzer to use + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be + read/written to or if there is any other low-level + IO error + + This constructor will be removed in the 3.0 release. + Use {@link + #IndexWriter(Directory,Analyzer,MaxFieldLength)} + instead, and call {@link #Commit()} when needed. + + + + Constructs an IndexWriter for the index in d. + Text will be analyzed with a. If create + is true, then a new, empty index will be created in + d, replacing the index already there, if any. + + + the index directory + + see above + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + + This constructor will be removed in the 3.0 release. + Use {@link + #IndexWriter(Directory,Analyzer,boolean,MaxFieldLength)} + instead, and call {@link #Commit()} when needed. + + + + Expert: constructs an IndexWriter with a custom {@link + IndexDeletionPolicy}, for the index in d, + first creating it if it does not already exist. Text + will be analyzed with a. + +

NOTE: autoCommit (see above) is set to false with this + constructor. + +

+ the index directory + + the analyzer to use + + see above + + whether or not to limit field lengths + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be + read/written to or if there is any other low-level + IO error + +
+ + Expert: constructs an IndexWriter with a custom {@link + IndexDeletionPolicy}, for the index in d, + first creating it if it does not already exist. Text + will be analyzed with a. + + + the index directory + + see above + + the analyzer to use + + see above + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be + read/written to or if there is any other low-level + IO error + + This constructor will be removed in the 3.0 release. + Use {@link + #IndexWriter(Directory,Analyzer,IndexDeletionPolicy,MaxFieldLength)} + instead, and call {@link #Commit()} when needed. + + + + Expert: constructs an IndexWriter with a custom {@link + IndexDeletionPolicy}, for the index in d. + Text will be analyzed with a. If + create is true, then a new, empty index + will be created in d, replacing the index + already there, if any. + +

NOTE: autoCommit (see above) is set to false with this + constructor. + +

+ the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + see above + + {@link Lucene.Net.Index.IndexWriter.MaxFieldLength}, whether or not to limit field lengths. Value is in number of terms/tokens + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + +
+ + Expert: constructs an IndexWriter with a custom {@link + IndexDeletionPolicy} and {@link IndexingChain}, + for the index in d. + Text will be analyzed with a. If + create is true, then a new, empty index + will be created in d, replacing the index + already there, if any. + +

NOTE: autoCommit (see above) is set to false with this + constructor. + +

+ the index directory + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + see above + + whether or not to limit field lengths, value is in number of terms/tokens. See {@link Lucene.Net.Index.IndexWriter.MaxFieldLength}. + + the {@link DocConsumer} chain to be used to + process documents + + which commit to open + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + +
+ + Expert: constructs an IndexWriter with a custom {@link + IndexDeletionPolicy}, for the index in d. + Text will be analyzed with a. If + create is true, then a new, empty index + will be created in d, replacing the index + already there, if any. + + + the index directory + + see above + + the analyzer to use + + true to create the index or overwrite + the existing one; false to append to the existing + index + + see above + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + + This constructor will be removed in the 3.0 release. + Use {@link + #IndexWriter(Directory,Analyzer,boolean,IndexDeletionPolicy,MaxFieldLength)} + instead, and call {@link #Commit()} when needed. + + + + Expert: constructs an IndexWriter on specific commit + point, with a custom {@link IndexDeletionPolicy}, for + the index in d. Text will be analyzed + with a. + +

This is only meaningful if you've used a {@link + IndexDeletionPolicy} in that past that keeps more than + just the last commit. + +

This operation is similar to {@link #Rollback()}, + except that method can only rollback what's been done + with the current instance of IndexWriter since its last + commit, whereas this method can rollback to an + arbitrary commit point from the past, assuming the + {@link IndexDeletionPolicy} has preserved past + commits. + +

NOTE: autoCommit (see above) is set to false with this + constructor. + +

+ the index directory + + the analyzer to use + + see above + + whether or not to limit field lengths, value is in number of terms/tokens. See {@link Lucene.Net.Index.IndexWriter.MaxFieldLength}. + + which commit to open + + CorruptIndexException if the index is corrupt + LockObtainFailedException if another writer + has this index open (write.lock could not + be obtained) + + IOException if the directory cannot be read/written to, or + if it does not exist and create is + false or if there is any other low-level + IO error + +
+ + Expert: set the merge policy used by this writer. + + + Expert: returns the current MergePolicy in use by this writer. + + + + + Expert: set the merge scheduler used by this writer. + + + Expert: returns the current MergePolicy in use by this + writer. + + + + + +

Determines the largest segment (measured by + document count) that may be merged with other segments. + Small values (e.g., less than 10,000) are best for + interactive indexing, as this limits the length of + pauses while indexing to a few seconds. Larger values + are best for batched indexing and speedier + searches.

+ +

The default value is {@link Integer#MAX_VALUE}.

+ +

Note that this method is a convenience method: it + just calls mergePolicy.setMaxMergeDocs as long as + mergePolicy is an instance of {@link LogMergePolicy}. + Otherwise an IllegalArgumentException is thrown.

+ +

The default merge policy ({@link + LogByteSizeMergePolicy}) also allows you to set this + limit by net size (in MB) of the segment, using {@link + LogByteSizeMergePolicy#setMaxMergeMB}.

+

+
+ +

Returns the largest segment (measured by document + count) that may be merged with other segments.

+ +

Note that this method is a convenience method: it + just calls mergePolicy.getMaxMergeDocs as long as + mergePolicy is an instance of {@link LogMergePolicy}. + Otherwise an IllegalArgumentException is thrown.

+ +

+ + +
+ + The maximum number of terms that will be indexed for a single field in a + document. This limits the amount of memory required for indexing, so that + collections with very large files will not crash the indexing process by + running out of memory. This setting refers to the number of running terms, + not to the number of different terms.

+ Note: this silently truncates large documents, excluding from the + index all terms that occur further in the document. If you know your source + documents are large, be sure to set this value high enough to accomodate + the expected size. If you set it to Integer.MAX_VALUE, then the only limit + is your memory, but you should anticipate an OutOfMemoryError.

+ By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms + will be indexed for a field. +

+
+ + Returns the maximum number of terms that will be + indexed for a single field in a document. + + + + + + Determines the minimal number of documents required + before the buffered in-memory documents are flushed as + a new Segment. Large values generally gives faster + indexing. + +

When this is set, the writer will flush every + maxBufferedDocs added documents. Pass in {@link + #DISABLE_AUTO_FLUSH} to prevent triggering a flush due + to number of buffered documents. Note that if flushing + by RAM usage is also enabled, then the flush will be + triggered by whichever comes first.

+ +

Disabled by default (writer flushes by RAM usage).

+ +

+ IllegalArgumentException if maxBufferedDocs is + enabled but smaller than 2, or it disables maxBufferedDocs + when ramBufferSize is already disabled + + + +
+ + If we are flushing by doc count (not by RAM usage), and + using LogDocMergePolicy then push maxBufferedDocs down + as its minMergeDocs, to keep backwards compatibility. + + + + Returns the number of buffered added documents that will + trigger a flush if enabled. + + + + + + Determines the amount of RAM that may be used for + buffering added documents and deletions before they are + flushed to the Directory. Generally for faster + indexing performance it's best to flush by RAM usage + instead of document count and use as large a RAM buffer + as you can. + +

When this is set, the writer will flush whenever + buffered documents and deletions use this much RAM. + Pass in {@link #DISABLE_AUTO_FLUSH} to prevent + triggering a flush due to RAM usage. Note that if + flushing by document count is also enabled, then the + flush will be triggered by whichever comes first.

+ +

NOTE: the account of RAM usage for pending + deletions is only approximate. Specifically, if you + delete by Query, Lucene currently has no way to measure + the RAM usage if individual Queries so the accounting + will under-estimate and you should compensate by either + calling commit() periodically yourself, or by using + {@link #setMaxBufferedDeleteTerms} to flush by count + instead of RAM usage (each buffered delete Query counts + as one). + +

+ NOTE: because IndexWriter uses ints when managing its + internal storage, the absolute maximum value for this setting is somewhat + less than 2048 MB. The precise limit depends on various factors, such as + how large your documents are, how many fields have norms, etc., so it's + best to set this value comfortably under 2048. +

+ +

The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.

+ +

+ IllegalArgumentException if ramBufferSize is + enabled but non-positive, or it disables ramBufferSize + when maxBufferedDocs is already disabled + +
+ + Returns the value set by {@link #setRAMBufferSizeMB} if enabled. + + +

Determines the minimal number of delete terms required before the buffered + in-memory delete terms are applied and flushed. If there are documents + buffered in memory at the time, they are merged and a new segment is + created.

+

Disabled by default (writer flushes by RAM usage).

+ +

+ IllegalArgumentException if maxBufferedDeleteTerms + is enabled but smaller than 1 + + + +
+ + Returns the number of buffered deleted terms that will + trigger a flush if enabled. + + + + + + Determines how often segment indices are merged by addDocument(). With + smaller values, less RAM is used while indexing, and searches on + unoptimized indices are faster, but indexing speed is slower. With larger + values, more RAM is used during indexing, and while searches on unoptimized + indices are slower, indexing is faster. Thus larger values (> 10) are best + for batch index creation, and smaller values (< 10) for indices that are + interactively maintained. + +

Note that this method is a convenience method: it + just calls mergePolicy.setMergeFactor as long as + mergePolicy is an instance of {@link LogMergePolicy}. + Otherwise an IllegalArgumentException is thrown.

+ +

This must never be less than 2. The default value is 10. +

+
+ +

Returns the number of segments that are merged at + once and also controls the total number of segments + allowed to accumulate in the index.

+ +

Note that this method is a convenience method: it + just calls mergePolicy.getMergeFactor as long as + mergePolicy is an instance of {@link LogMergePolicy}. + Otherwise an IllegalArgumentException is thrown.

+ +

+ + +
+ + Expert: returns max delay inserted before syncing a + commit point. On Windows, at least, pausing before + syncing can increase net indexing throughput. The + delay is variable based on size of the segment's files, + and is only inserted when using + ConcurrentMergeScheduler for merges. + + This will be removed in 3.0, when + autoCommit=true is removed from IndexWriter. + + + + Expert: sets the max delay before syncing a commit + point. + + + + This will be removed in 3.0, when + autoCommit=true is removed from IndexWriter. + + + + If non-null, this will be the default infoStream used + by a newly instantiated IndexWriter. + + + + + + Returns the current default infoStream for newly + instantiated IndexWriters. + + + + + + If non-null, information about merges, deletes and a + message when maxFieldLength is reached will be printed + to this. + + + + Returns the current infoStream in use by this writer. + + + + + Returns true if verbosing is enabled (i.e., infoStream != null). + + + to change the default value for all instances of IndexWriter. + + + + Returns allowed timeout when acquiring the write lock. + + + + + Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in + milliseconds). + + + + Returns default write lock timeout for newly + instantiated IndexWriters. + + + + + + Commits all changes to an index and closes all + associated files. Note that this may be a costly + operation, so, try to re-use a single writer instead of + closing and opening a new one. See {@link #Commit()} for + caveats about write caching done by some IO devices. + +

If an Exception is hit during close, eg due to disk + full or some other reason, then both the on-disk index + and the internal state of the IndexWriter instance will + be consistent. However, the close will not be complete + even though part of it (flushing buffered documents) + may have succeeded, so the write lock will still be + held.

+ +

If you can correct the underlying cause (eg free up + some disk space) then you can call close() again. + Failing that, if you want to force the write lock to be + released (dangerous, because you may then lose buffered + docs in the IndexWriter instance) then you can do + something like this:

+ +

+            try {
+            writer.close();
+            } finally {
+            if (IndexWriter.isLocked(directory)) {
+            IndexWriter.unlock(directory);
+            }
+            }
+            
+ + after which, you must be certain not to use the writer + instance anymore.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer, again. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Closes the index with or without waiting for currently + running merges to finish. This is only meaningful when + using a MergeScheduler that runs merges in background + threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer, again. See above for details.

+ +

NOTE: it is dangerous to always call + close(false), especially when IndexWriter is not open + for very long, because this can result in "merge + starvation" whereby long merges will never have a + chance to finish. This will cause too many segments in + your index over time.

+ +

+ if true, this call will block + until all merges complete; else, it will ask all + running merges to abort, wait until those merges have + finished (which should be at most a few seconds), and + then return. + +
+ + Tells the docWriter to close its currently open shared + doc stores (stored fields & vectors files). + Return value specifices whether new doc store files are compound or not. + + + + Returns the Directory used by this index. + + + Returns the analyzer used by this index. + + + Returns the number of documents currently in this + index, not counting deletions. + + Please use {@link #MaxDoc()} (same as this + method) or {@link #NumDocs()} (also takes deletions + into account), instead. + + + + Returns total number of docs in this index, including + docs not yet flushed (still in the RAM buffer), + not counting deletions. + + + + + + Returns total number of docs in this index, including + docs not yet flushed (still in the RAM buffer), and + including deletions. NOTE: buffered deletions + are not counted. If you really need these to be + counted you should call {@link #Commit()} first. + + + + + + The maximum number of terms that will be indexed for a single field in a + document. This limits the amount of memory required for indexing, so that + collections with very large files will not crash the indexing process by + running out of memory.

+ Note that this effectively truncates large documents, excluding from the + index terms that occur further in the document. If you know your source + documents are large, be sure to set this value high enough to accomodate + the expected size. If you set it to Integer.MAX_VALUE, then the only limit + is your memory, but you should anticipate an OutOfMemoryError.

+ By default, no more than 10,000 terms will be indexed for a field. + +

+ + +
+ + Adds a document to this index. If the document contains more than + {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are + discarded. + +

Note that if an Exception is hit (for example disk full) + then the index will be consistent, but this document + may not have been added. Furthermore, it's possible + the index will have one segment in non-compound format + even when using compound files (when a merge has + partially succeeded).

+ +

This method periodically flushes pending documents + to the Directory (see above), and + also periodically triggers segment merges in the index + according to the {@link MergePolicy} in use.

+ +

Merges temporarily consume space in the + directory. The amount of space required is up to 1X the + size of all segments being merged, when no + readers/searchers are open against the index, and up to + 2X the size of all segments being merged when + readers/searchers are open against the index (see + {@link #Optimize()} for details). The sequence of + primitive merge operations performed is governed by the + merge policy. + +

Note that each term in the document can be no longer + than 16383 characters, otherwise an + IllegalArgumentException will be thrown.

+ +

Note that it's possible to create an invalid Unicode + string in java if a UTF16 surrogate pair is malformed. + In this case, the invalid characters are silently + replaced with the Unicode replacement character + U+FFFD.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Adds a document to this index, using the provided analyzer instead of the + value of {@link #GetAnalyzer()}. If the document contains more than + {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are + discarded. + +

See {@link #AddDocument(Document)} for details on + index and IndexWriter state after an Exception, and + flushing/merging temporary free space requirements.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Deletes the document(s) containing term. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ the term to identify the documents to be deleted + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Deletes the document(s) containing any of the + terms. All deletes are flushed at the same time. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ array of terms to identify the documents + to be deleted + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Deletes the document(s) matching the provided query. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ the query to identify the documents to be deleted + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Deletes the document(s) matching any of the provided queries. + All deletes are flushed at the same time. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ array of queries to identify the documents + to be deleted + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Updates a document by first deleting the document(s) + containing term and then adding the new + document. The delete and then add are atomic as seen + by a reader on the same index (flush may happen only after + the add). + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ the term to identify the document(s) to be + deleted + + the document to be added + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Updates a document by first deleting the document(s) + containing term and then adding the new + document. The delete and then add are atomic as seen + by a reader on the same index (flush may happen only after + the add). + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ the term to identify the document(s) to be + deleted + + the document to be added + + the analyzer to use when analyzing the document + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + If non-null, information about merges will be printed to this. + + + Requests an "optimize" operation on an index, priming the index + for the fastest available search. Traditionally this has meant + merging all segments into a single segment as is done in the + default merge policy, but individaul merge policies may implement + optimize in different ways. + +

It is recommended that this method be called upon completion of indexing. In + environments with frequent updates, optimize is best done during low volume times, if at all. + +

+

See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion.

+ +

Note that optimize requires 2X the index size free + space in your Directory. For example, if your index + size is 10 MB then you need 20 MB free for optimize to + complete.

+ +

If some but not all readers re-open while an + optimize is underway, this will cause > 2X temporary + space to be consumed as those new readers will then + hold open the partially optimized segments at that + time. It is best not to re-open readers while optimize + is running.

+ +

The actual temporary usage could be much less than + these figures (it depends on many factors).

+ +

In general, once the optimize completes, the total size of the + index will be less than the size of the starting index. + It could be quite a bit smaller (if there were many + pending deletes) or just slightly smaller.

+ +

If an Exception is hit during optimize(), for example + due to disk full, the index will not be corrupt and no + documents will have been lost. However, it may have + been partially optimized (some segments were merged but + not all), and it's possible that one of the segments in + the index will be in non-compound format even when + using compound file format. This will occur when the + Exception is hit during conversion of the segment into + compound format.

+ +

This call will optimize those segments present in + the index when the call started. If other threads are + still adding documents and flushing segments, those + newly created segments will not be optimized unless you + call optimize again.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + +
+ + Optimize the index down to <= maxNumSegments. If + maxNumSegments==1 then this is the same as {@link + #Optimize()}. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ maximum number of segments left + in the index after optimization finishes + +
+ + Just like {@link #Optimize()}, except you can specify + whether the call should block until the optimize + completes. This is only meaningful with a + {@link MergeScheduler} that is able to run merges in + background threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Just like {@link #Optimize(int)}, except you can + specify whether the call should block until the + optimize completes. This is only meaningful with a + {@link MergeScheduler} that is able to run merges in + background threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Returns true if any merges in pendingMerges or + runningMerges are optimization merges. + + + + Just like {@link #ExpungeDeletes()}, except you can + specify whether the call should block until the + operation completes. This is only meaningful with a + {@link MergeScheduler} that is able to run merges in + background threads. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Expunges all deletes from the index. When an index + has many document deletions (or updates to existing + documents), it's best to either call optimize or + expungeDeletes to remove all unused data in the index + associated with the deleted documents. To see how + many deletions you have pending in your index, call + {@link IndexReader#numDeletedDocs} + This saves disk space and memory usage while + searching. expungeDeletes should be somewhat faster + than optimize since it does not insist on reducing the + index to a single segment (though, this depends on the + {@link MergePolicy}; see {@link + MergePolicy#findMergesToExpungeDeletes}.). Note that + this call does not first commit any buffered + documents, so you must do so yourself if necessary. + See also {@link #ExpungeDeletes(boolean)} + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Expert: asks the mergePolicy whether any merges are + necessary now and if so, runs the requested merges and + then iterate (test again if merges are needed) until no + more merges are returned by the mergePolicy. + + Explicit calls to maybeMerge() are usually not + necessary. The most common case is when merge policy + parameters have changed. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Expert: the {@link MergeScheduler} calls this method + to retrieve the next merge requested by the + MergePolicy + + + + Like getNextMerge() except only returns a merge if it's + external. + + + + Please use {@link #rollback} instead. + + + + Close the IndexWriter without committing + any changes that have occurred since the last commit + (or since it was opened, if commit hasn't been called). + This removes any temporary files that had been created, + after which the state of the index will be the same as + it was when commit() was last called or when this + writer was first opened. This can only be called when + this IndexWriter was opened with + autoCommit=false. This also clears a + previous call to {@link #prepareCommit}. + + IllegalStateException if this is called when + the writer was opened with autoCommit=true. + + IOException if there is a low-level IO error + + + Delete all documents in the index. + +

This method will drop all buffered documents and will + remove all segments from the index. This change will not be + visible until a {@link #Commit()} has been called. This method + can be rolled back using {@link #Rollback()}.

+ +

NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).

+ +

NOTE: this method will forcefully abort all merges + in progress. If other threads are running {@link + #Optimize()} or any of the addIndexes methods, they + will receive {@link MergePolicy.MergeAbortedException}s. +

+
+ + Wait for any currently outstanding merges to finish. + +

It is guaranteed that any merges started prior to calling this method + will have completed once this method completes.

+

+
+ + Merges all segments from an array of indexes into this index. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ Use {@link #addIndexesNoOptimize} instead, + then separately call {@link #optimize} afterwards if + you need to. + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Merges all segments from an array of indexes into this + index. + +

This may be used to parallelize batch indexing. A large document + collection can be broken into sub-collections. Each sub-collection can be + indexed in parallel, on a different thread, process or machine. The + complete index can then be created by merging sub-collection indexes + with this method. + +

NOTE: the index in each Directory must not be + changed (opened by a writer) while this method is + running. This method does not acquire a write lock in + each input Directory, so it is up to the caller to + enforce this. + +

NOTE: while this is running, any attempts to + add or delete documents (with another thread) will be + paused until this method completes. + +

This method is transactional in how Exceptions are + handled: it does not commit a new segments_N file until + all indexes are added. This means if an Exception + occurs (for example disk full), then either no indexes + will have been added or they all will have been.

+ +

Note that this requires temporary free space in the + Directory up to 2X the sum of all input indexes + (including the starting index). If readers/searchers + are open against the starting index, then temporary + free space required will be higher by the size of the + starting index (see {@link #Optimize()} for details). +

+ +

Once this completes, the final size of the index + will be less than the sum of all input index sizes + (including the starting index). It could be quite a + bit smaller (if there were many pending deletes) or + just slightly smaller.

+ +

+ This requires this index not be among those to be added. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Merges the provided indexes into this index. +

After this completes, the index is optimized.

+

The provided IndexReaders are not closed.

+ +

NOTE: while this is running, any attempts to + add or delete documents (with another thread) will be + paused until this method completes. + +

See {@link #AddIndexesNoOptimize(Directory[])} for + details on transactional semantics, temporary free + space required in the Directory, and non-CFS segments + on an Exception.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Flush all in-memory buffered updates (adds and deletes) + to the Directory. +

Note: while this will force buffered docs to be + pushed into the index, it will not make these docs + visible to a reader. Use {@link #Commit()} instead + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ please call {@link #Commit()}) instead + + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error +
+ + Expert: prepare for commit. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ + +
+ +

Expert: prepare for commit, specifying + commitUserData Map (String -> String). This does the + first phase of 2-phase commit. You can only call this + when autoCommit is false. This method does all steps + necessary to commit changes since this writer was + opened: flushes pending added and deleted docs, syncs + the index files, writes most of next segments_N file. + After calling this you must call either {@link + #Commit()} to finish the commit, or {@link + #Rollback()} to revert the commit and undo all changes + done since the writer was opened.

+ + You can also just call {@link #Commit(Map)} directly + without prepareCommit first in which case that method + will internally call prepareCommit. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ Opaque Map (String->String) + that's recorded into the segments file in the index, + and retrievable by {@link + IndexReader#getCommitUserData}. Note that when + IndexWriter commits itself, for example if open with + autoCommit=true, or, during {@link #close}, the + commitUserData is unchanged (just carried over from + the prior commit). If this is null then the previous + commitUserData is kept. Also, the commitUserData will + only "stick" if there are actually changes in the + index to commit. Therefore it's best to use this + feature only when autoCommit is false. + +
+ +

Commits all pending changes (added & deleted + documents, optimizations, segment merges, added + indexes, etc.) to the index, and syncs all referenced + index files, such that a reader will see the changes + and the index updates will survive an OS or machine + crash or power loss. Note that this does not wait for + any running background merges to finish. This may be a + costly operation, so you should test the cost in your + application and do it only when really necessary.

+ +

Note that this operation calls Directory.sync on + the index files. That call should not return until the + file contents & metadata are on stable storage. For + FSDirectory, this calls the OS's fsync. But, beware: + some hardware devices may in fact cache writes even + during fsync, and return before the bits are actually + on stable storage, to give the appearance of faster + performance. If you have such a device, and it does + not have a battery backup (for example) then on power + loss it may still lose data. Lucene cannot guarantee + consistency on such devices.

+ +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+ +

+ + + + +
+ + Commits all changes to the index, specifying a + commitUserData Map (String -> String). This just + calls {@link #PrepareCommit(Map)} (if you didn't + already call it) and then {@link #finishCommit}. + +

NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer. See above for details.

+

+
+ + Flush all in-memory buffered udpates (adds and deletes) + to the Directory. + + if true, we may merge segments (if + deletes or docs were flushed) if necessary + + if false we are allowed to keep + doc stores open to share with the next segment + + whether pending deletes should also + be flushed + + + + Expert: Return the total size of all index files currently cached in memory. + Useful for size management with flushRamDocs() + + + + Expert: Return the number of documents currently + buffered in RAM. + + + + Carefully merges deletes for the segments we just + merged. This is tricky because, although merging will + clear all deletes (compacts the documents), new + deletes may have been flushed to the segments since + the merge was started. This method "carries over" + such new deletes onto the newly merged segment, and + saves the resulting deletes file (incrementing the + delete generation for merge.info). If no deletes were + flushed, no new deletes file is saved. + + + + Merges the indicated segments, replacing them in the stack with a + single segment. + + + + Hook that's called when the specified merge is complete. + + + Checks whether this merge involves any segments + already participating in a merge. If not, this merge + is "registered", meaning we record that its segments + are now participating in a merge, and true is + returned. Else (the merge conflicts) false is + returned. + + + + Does initial setup for a merge, which is fast but holds + the synchronized lock on IndexWriter instance. + + + + This is called after merging a segment and before + building its CFS. Return true if the files should be + sync'd. If you return false, then the source segment + files that were merged cannot be deleted until the CFS + file is built & sync'd. So, returning false consumes + more transient disk space, but saves performance of + not having to sync files which will shortly be deleted + anyway. + + -- this will be removed in 3.0 when + autoCommit is hardwired to false + + + + Does fininishing for a merge, which is fast but holds + the synchronized lock on IndexWriter instance. + + + + Does the actual (time-consuming) work of the merge, + but without holding synchronized lock on IndexWriter + instance + + + + Blocks until all files in syncing are sync'd + + + Pauses before syncing. On Windows, at least, it's + best (performance-wise) to pause in order to let OS + flush writes to disk on its own, before forcing a + sync. + + -- this will be removed in 3.0 when + autoCommit is hardwired to false + + + + Walk through all files referenced by the current + segmentInfos and ask the Directory to sync each file, + if it wasn't already. If that succeeds, then we + prepare a new segments_N file but do not fully commit + it. + + + + Returns true iff the index in the named directory is + currently locked. + + the directory to check for a lock + + IOException if there is a low-level IO error + + + Returns true iff the index in the named directory is + currently locked. + + the directory to check for a lock + + IOException if there is a low-level IO error + Use {@link #IsLocked(Directory)} + + + + Forcibly unlocks the index in the named directory. +

+ Caution: this should only be used by failure recovery code, + when it is known that no other process nor thread is in fact + currently accessing this index. +

+
+ + Set the merged segment warmer. See {@link + IndexReaderWarmer}. + + + + Returns the current merged segment warmer. See {@link + IndexReaderWarmer}. + + + + Deprecated: emulates IndexWriter's buggy behavior when + first token(s) have positionIncrement==0 (ie, prior to + fixing LUCENE-1542) + + + + Holds shared SegmentReader instances. IndexWriter uses + SegmentReaders for 1) applying deletes, 2) doing + merges, 3) handing out a real-time reader. This pool + reuses instances of the SegmentReaders in all these + places if it is in "near real-time mode" (getReader() + has been called on this instance). + + + + Forcefully clear changes for the specifed segments, + and remove from the pool. This is called on succesful merge. + + + + Release the segment reader (i.e. decRef it and close if there + are no more references. + + + + IOException + + + Release the segment reader (i.e. decRef it and close if there + are no more references. + + + + IOException + + + Remove all our references to readers, and commits + any pending changes. + + + + Commit all segment reader in the pool. + IOException + + + Returns a ref to a clone. NOTE: this clone is not + enrolled in the pool, so you should simply close() + it when you're done (ie, do not call release()). + + + + Obtain a SegmentReader from the readerPool. The reader + must be returned by calling {@link #Release(SegmentReader)} + + + + + + + + IOException + + + Obtain a SegmentReader from the readerPool. The reader + must be returned by calling {@link #Release(SegmentReader)} + + + + + + + + + + + + + IOException + + + Specifies maximum field length (in number of tokens/terms) in {@link IndexWriter} constructors. + {@link #SetMaxFieldLength(int)} overrides the value set by + the constructor. + + + + Private type-safe-enum-pattern constructor. + + + instance name + + maximum field length + + + + Public constructor to allow users to specify the maximum field size limit. + + + The maximum field length + + + + Sets the maximum field length to {@link Integer#MAX_VALUE}. + + + Sets the maximum field length to + {@link #DEFAULT_MAX_FIELD_LENGTH} + + + + + If {@link #getReader} has been called (ie, this writer + is in near real-time mode), then after a merge + completes, this class can be invoked to warm the + reader on the newly merged segment, before the merge + commits. This is not required for near real-time + search, but will reduce search latency on opening a + new near real-time reader after a merge completes. + +

NOTE: This API is experimental and might + change in incompatible ways in the next release.

+ +

NOTE: warm is called before any deletes have + been carried over to the merged segment. +

+
+ + Filename filter that accept filenames and extensions only created by Lucene. + + + $rcs = ' $Id: Exp $ ' ; + + + + Returns true if this is a file that would be contained + in a CFS file. This function should only be called on + files that pass the above "accept" (ie, are already + known to be a Lucene index file). + + + + Change to true to see details of reference counts when + infoStream != null + + + + Initialize the deleter: find all previous commits in + the Directory, incref the files they reference, call + the policy to let it delete commits. This will remove + any files not referenced by any of the commits. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + + + Remove the CommitPoints in the commitsToDelete List by + DecRef'ing all files from each SegmentInfos. + + + + Writer calls this when it has hit an error and had to + roll back, to tell us that there may now be + unreferenced files in the filesystem. So we re-list + the filesystem and delete such files. If segmentName + is non-null, we will only delete files corresponding to + that segment. + + + + For definition of "check point" see IndexWriter comments: + "Clarification: Check Points (and commits)". + + Writer calls this when it has made a "consistent + change" to the index, meaning new files are written to + the index and the in-memory SegmentInfos have been + modified to point to those files. + + This may or may not be a commit (segments_N may or may + not have been written). + + We simply incref the files referenced by the new + SegmentInfos and decref the files we had previously + seen (if any). + + If this is a commit, we also call the policy to give it + a chance to remove other commits. If any commits are + removed, we decref their files as well. + + + + Deletes the specified files, but only if they are new + (have not yet been incref'd). + + + + Tracks the reference count for a single index file: + + + Holds details for each commit point. This class is + also passed to the deletion policy. Note: this class + has a natural ordering that is inconsistent with + equals. + + + + Called only be the deletion policy, to remove this + commit point from the index. + + + + Add a new position & payload + + + Called when we are done adding positions & payloads + + + Abstract API that consumes terms, doc, freq, prox and + payloads postings. Concrete implementations of this + actually do "something" with the postings (write it into + the index in a specific format). + + NOTE: this API is experimental and will likely change + + + + Add a new field + + + Called when we are done adding everything. + + + Add a new field + + + Called when we are done adding everything. + + + Holds buffered deletes, by docID, term or query. We + hold two instances of this class: one for the deletes + prior to the last flush, the other for deletes after + the last flush. This is so if we need to abort + (discard all buffered docs) we can also discard the + buffered deletes yet keep the deletes done during + previously flushed segments. + + + + A Token is an occurrence of a term from the text of a field. It consists of + a term's text, the start and end offset of the term in the text of the field, + and a type string. +

+ The start and end offsets permit applications to re-associate a token with + its source text, e.g., to display highlighted query terms in a document + browser, or to show matching text fragments in a KWIC display, etc. +

+ The type is a string, assigned by a lexical analyzer + (a.k.a. tokenizer), naming the lexical or syntactic class that the token + belongs to. For example an end of sentence marker token might be implemented + with type "eos". The default token type is "word". +

+ A Token can optionally have metadata (a.k.a. Payload) in the form of a variable + length byte array. Use {@link TermPositions#GetPayloadLength()} and + {@link TermPositions#GetPayload(byte[], int)} to retrieve the payloads from the index. +

+

+
+

NOTE: As of 2.9, Token implements all {@link Attribute} interfaces + that are part of core Lucene and can be found in the {@code tokenattributes} subpackage. + Even though it is not necessary to use Token anymore, with the new TokenStream API it can + be used as convenience class that implements all {@link Attribute}s, which is especially useful + to easily switch from the old to the new TokenStream API. +

+

+

NOTE: As of 2.3, Token stores the term text + internally as a malleable char[] termBuffer instead of + String termText. The indexing code and core tokenizers + have been changed to re-use a single Token instance, changing + its buffer and other fields in-place as the Token is + processed. This provides substantially better indexing + performance as it saves the GC cost of new'ing a Token and + String for every term. The APIs that accept String + termText are still available but a warning about the + associated performance cost has been added (below). The + {@link #TermText()} method has been deprecated.

+

+

Tokenizers and TokenFilters should try to re-use a Token instance when + possible for best performance, by implementing the + {@link TokenStream#IncrementToken()} API. + Failing that, to create a new Token you should first use + one of the constructors that starts with null text. To load + the token from a char[] use {@link #SetTermBuffer(char[], int, int)}. + To load from a String use {@link #SetTermBuffer(String)} or {@link #SetTermBuffer(String, int, int)}. + Alternatively you can get the Token's termBuffer by calling either {@link #TermBuffer()}, + if you know that your text is shorter than the capacity of the termBuffer + or {@link #ResizeTermBuffer(int)}, if there is any possibility + that you may need to grow the buffer. Fill in the characters of your term into this + buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string, + or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #SetTermLength(int)} to + set the length of the term text. See LUCENE-969 + for details.

+

Typical Token reuse patterns: +

    +
  • Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not + specified):
    +
    +            return reusableToken.reinit(string, startOffset, endOffset[, type]);
    +            
    +
  • +
  • Copying some text from a string (type is reset to {@link #DEFAULT_TYPE} + if not specified):
    +
    +            return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
    +            
    +
  • +
  • Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE} + if not specified):
    +
    +            return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
    +            
    +
  • +
  • Copying some text from a char[] buffer (type is reset to + {@link #DEFAULT_TYPE} if not specified):
    +
    +            return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
    +            
    +
  • +
  • Copying from one one Token to another (type is reset to + {@link #DEFAULT_TYPE} if not specified):
    +
    +            return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
    +            
    +
  • +
+ A few things to note: +
    +
  • clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.
  • +
  • Because TokenStreams can be chained, one cannot assume that the Token's current type is correct.
  • +
  • The startOffset and endOffset represent the start and offset in the + source text, so be careful in adjusting them.
  • +
  • When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.
  • +
+

+

+ + +
+ + We will remove this when we remove the + deprecated APIs + + + + Characters for the term text. + This will be made private. Instead, use: + {@link #TermBuffer()}, + {@link #SetTermBuffer(char[], int, int)}, + {@link #SetTermBuffer(String)}, or + {@link #SetTermBuffer(String, int, int)} + + + + Length of term text in the buffer. + This will be made private. Instead, use: + {@link #TermLength()}, or @{link setTermLength(int)}. + + + + Start in source text. + This will be made private. Instead, use: + {@link #StartOffset()}, or @{link setStartOffset(int)}. + + + + End in source text. + This will be made private. Instead, use: + {@link #EndOffset()}, or @{link setEndOffset(int)}. + + + + The lexical type of the token. + This will be made private. Instead, use: + {@link #Type()}, or @{link setType(String)}. + + + + This will be made private. Instead, use: + {@link #GetPayload()}, or @{link setPayload(Payload)}. + + + + This will be made private. Instead, use: + {@link #GetPositionIncrement()}, or @{link setPositionIncrement(String)}. + + + + Constructs a Token will null text. + + + Constructs a Token with null text and start & end + offsets. + + start offset in the source text + + end offset in the source text + + + + Constructs a Token with null text and start & end + offsets plus the Token type. + + start offset in the source text + + end offset in the source text + + the lexical type of this Token + + + + Constructs a Token with null text and start & end + offsets plus flags. NOTE: flags is EXPERIMENTAL. + + start offset in the source text + + end offset in the source text + + The bits to set for this token + + + + Constructs a Token with the given term text, and start + & end offsets. The type defaults to "word." + NOTE: for better indexing speed you should + instead use the char[] termBuffer methods to set the + term text. + + term text + + start offset + + end offset + + + + Constructs a Token with the given text, start and end + offsets, & type. NOTE: for better indexing + speed you should instead use the char[] termBuffer + methods to set the term text. + + term text + + start offset + + end offset + + token type + + + + Constructs a Token with the given text, start and end + offsets, & type. NOTE: for better indexing + speed you should instead use the char[] termBuffer + methods to set the term text. + + + + + + + + token type bits + + + + Constructs a Token with the given term buffer (offset + & length), start and end + offsets + + + + + + + + + + + + + + Set the position increment. This determines the position of this token + relative to the previous Token in a {@link TokenStream}, used in phrase + searching. + +

The default value is one. + +

Some common uses for this are:

    + +
  • Set it to zero to put multiple terms in the same position. This is + useful if, e.g., a word has multiple stems. Searches for phrases + including either stem will match. In this case, all but the first stem's + increment should be set to zero: the increment of the first instance + should be one. Repeating a token with an increment of zero can also be + used to boost the scores of matches on that token.
  • + +
  • Set it to values greater than one to inhibit exact phrase matches. + If, for example, one does not want phrases to match across removed stop + words, then one could build a stop word filter that removes stop words and + also sets the increment to the number of stop words removed before each + non-stop word. Then exact phrase queries will only match when the terms + occur with no intervening stop words.
  • + +
+
+ the distance from the prior term + + + +
+ + Returns the position increment of this Token. + + + + + Sets the Token's term text. NOTE: for better + indexing speed you should instead use the char[] + termBuffer methods to set the term text. + + use {@link #SetTermBuffer(char[], int, int)} or + {@link #SetTermBuffer(String)} or + {@link #SetTermBuffer(String, int, int)}. + + + + Returns the Token's term text. + + + This method now has a performance penalty + because the text is stored internally in a char[]. If + possible, use {@link #TermBuffer()} and {@link + #TermLength()} directly instead. If you really need a + String, use {@link #Term()} + + + + Returns the Token's term text. + + This method has a performance penalty + because the text is stored internally in a char[]. If + possible, use {@link #TermBuffer()} and {@link + #TermLength()} directly instead. If you really need a + String, use this method, which is nothing more than + a convenience call to new String(token.termBuffer(), 0, token.termLength()) + + + + Copies the contents of buffer, starting at offset for + length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Copies the contents of buffer into the termBuffer array. + the buffer to copy + + + + Copies the contents of buffer, starting at offset and continuing + for length characters, into the termBuffer array. + + the buffer to copy + + the index in the buffer of the first character to copy + + the number of characters to copy + + + + Returns the internal termBuffer character array which + you can then directly alter. If the array is too + small for your token, use {@link + #ResizeTermBuffer(int)} to increase it. After + altering the buffer be sure to call {@link + #setTermLength} to record the number of valid + characters that were placed into the termBuffer. + + + + Grows the termBuffer to at least size newSize, preserving the + existing content. Note: If the next operation is to change + the contents of the term buffer use + {@link #SetTermBuffer(char[], int, int)}, + {@link #SetTermBuffer(String)}, or + {@link #SetTermBuffer(String, int, int)} + to optimally combine the resize with the setting of the termBuffer. + + minimum size of the new termBuffer + + newly created termBuffer with length >= newSize + + + + Allocates a buffer char[] of at least newSize, without preserving the existing content. + its always used in places that set the content + + minimum size of the buffer + + + + Return number of valid characters (length of the term) + in the termBuffer array. + + + + Set number of valid characters (length of the term) in + the termBuffer array. Use this to truncate the termBuffer + or to synchronize with external manipulation of the termBuffer. + Note: to grow the size of the array, + use {@link #ResizeTermBuffer(int)} first. + + the truncated length + + + + Returns this Token's starting offset, the position of the first character + corresponding to this token in the source text. + Note that the difference between endOffset() and startOffset() may not be + equal to termText.length(), as the term text may have been altered by a + stemmer or some other filter. + + + + Set the starting offset. + + + + + Returns this Token's ending offset, one greater than the position of the + last character corresponding to this token in the source text. The length + of the token in the source text is (endOffset - startOffset). + + + + Set the ending offset. + + + + + Set the starting and ending offset. + See StartOffset() and EndOffset() + + + + Returns this Token's lexical type. Defaults to "word". + + + Set the lexical type. + + + + + EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. +

+ + Get the bitset for any bits that have been set. This is completely distinct from {@link #Type()}, although they do share similar purposes. + The flags can be used to encode information about the token for use by other {@link Lucene.Net.Analysis.TokenFilter}s. + + +

+ The bits + +
+ + + + + + Returns this Token's payload. + + + Sets this Token's payload. + + + Resets the term text, payload, flags, and positionIncrement, + startOffset, endOffset and token type to default. + + + + Makes a clone, but replaces the term buffer & + start/end offset in the process. This is more + efficient than doing a full clone (and then calling + setTermBuffer) because it saves a wasted copy of the old + termBuffer. + + + + Shorthand for calling {@link #clear}, + {@link #SetTermBuffer(char[], int, int)}, + {@link #setStartOffset}, + {@link #setEndOffset}, + {@link #setType} + + this Token instance + + + + Shorthand for calling {@link #clear}, + {@link #SetTermBuffer(char[], int, int)}, + {@link #setStartOffset}, + {@link #setEndOffset} + {@link #setType} on Token.DEFAULT_TYPE + + this Token instance + + + + Shorthand for calling {@link #clear}, + {@link #SetTermBuffer(String)}, + {@link #setStartOffset}, + {@link #setEndOffset} + {@link #setType} + + this Token instance + + + + Shorthand for calling {@link #clear}, + {@link #SetTermBuffer(String, int, int)}, + {@link #setStartOffset}, + {@link #setEndOffset} + {@link #setType} + + this Token instance + + + + Shorthand for calling {@link #clear}, + {@link #SetTermBuffer(String)}, + {@link #setStartOffset}, + {@link #setEndOffset} + {@link #setType} on Token.DEFAULT_TYPE + + this Token instance + + + + Shorthand for calling {@link #clear}, + {@link #SetTermBuffer(String, int, int)}, + {@link #setStartOffset}, + {@link #setEndOffset} + {@link #setType} on Token.DEFAULT_TYPE + + this Token instance + + + + Copy the prototype token's fields into this one. Note: Payloads are shared. + + + + + Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. + + + + + + + Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. + + + + + + + + + + + A simple class that stores Strings as char[]'s in a + hash table. Note that this is not a general purpose + class. For example, it cannot remove items from the + set, nor does it resize its hash table to be smaller, + etc. It is designed to be quick to test if a char[] + is in the set without the necessity of converting it + to a String first. + + + + Create set with enough capacity to hold startSize + terms + + + + Create set from a Collection of char[] or String + + + Create set from entries + + + true if the len chars of text starting at off + are in the set + + + + true if the System.String is in the set + + + Returns true if the String is in the set + + + Add this String into the set + + + Add this char[] directly to the set. + If ignoreCase is true for this Set, the text array will be directly modified. + The user should never modify this text array after calling this method. + + + + Returns an unmodifiable {@link CharArraySet}. This allows to provide + unmodifiable views of internal sets for "read-only" use. + + + a set for which the unmodifiable set is returned. + + an new unmodifiable {@link CharArraySet}. + + NullPointerException + if the given set is null. + + + + Adds all of the elements in the specified collection to this collection + + + Removes all elements from the set + + + Removes from this set all of its elements that are contained in the specified collection + + + Retains only the elements in this set that are contained in the specified collection + + + The Iterator<String> for this set. Strings are constructed on the fly, so + use nextCharArray for more efficient access. + + + + do not modify the returned char[] + + + Returns the next String, as a Set<String> would... + use nextCharArray() for better efficiency. + + + + Efficient unmodifiable {@link CharArraySet}. This implementation does not + delegate calls to a give {@link CharArraySet} like + {@link Collections#UnmodifiableSet(java.util.Set)} does. Instead is passes + the internal representation of a {@link CharArraySet} to a super + constructor and overrides all mutators. + + + + This exception is thrown when you try to list a + non-existent directory. + + + + Use this {@link LockFactory} to disable locking entirely. + This LockFactory is used when you call {@link FSDirectory#setDisableLocks}. + Only one instance of this lock is created. You should call {@link + #GetNoLockFactory()} to get the instance. + + + + + + + Simple standalone server that must be running when you + use {@link VerifyingLockFactory}. This server simply + verifies at most one process holds the lock at a time. + Run without any args to see usage. + + + + + + + + + Writes bytes through to a primary IndexOutput, computing + checksum. Note that you cannot use seek(). + + + + Starts but does not complete the commit of this file (= + writing of the final checksum at the end). After this + is called must call {@link #finishCommit} and the + {@link #close} to complete the commit. + + + + See {@link #prepareCommit} + + + Subclass of FilteredTermEnum for enumerating all terms that match the + specified range parameters. +

+ Term enumerations are always ordered by Term.compareTo(). Each term in + the enumeration is greater than all that precede it. +

+ 2.9 + +
+ + Enumerates all terms greater/equal than lowerTerm + but less/equal than upperTerm. + + If an endpoint is null, it is said to be "open". Either or both + endpoints may be open. Open endpoints may not be exclusive + (you can't select all but the first or last term without + explicitly specifying the term to exclude.) + + + + + An interned field that holds both lower and upper terms. + + The term text at the lower end of the range + + The term text at the upper end of the range + + If true, the lowerTerm is included in the range. + + If true, the upperTerm is included in the range. + + The collator to use to collate index Terms, to determine their + membership in the range bounded by lowerTerm and + upperTerm. + + + IOException + + + Matches the union of its clauses. + + + Construct a SpanOrQuery merging the provided clauses. + + + Return the clauses whose spans are matched. + + + Returns a collection of all terms matched by this query. + use extractTerms instead + + + + + + Similar to {@link NearSpansOrdered}, but for the unordered case. + + Expert: + Only public for subclassing. Most implementations should not need this class + + + + WARNING: The List is not necessarily in order of the the positions + Collection of byte[] payloads + + IOException + + + Wraps a Spans, and can be used to form a linked list. + + + This class is very similar to + {@link Lucene.Net.Search.Spans.SpanTermQuery} except that it factors + in the value of the payload located at each of the positions where the + {@link Lucene.Net.Index.Term} occurs. +

+ In order to take advantage of this, you must override + {@link Lucene.Net.Search.Similarity#ScorePayload(String, byte[],int,int)} + which returns 1 by default. +

+ Payload scores are aggregated using a pluggable {@link PayloadFunction}. + +

+
+ + Public for extension only. + + + not needed anymore + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + use {@link #DocID()} instead. + + + + + {@link #GetSpanScore()} * {@link #GetPayloadScore()} + + IOException + + + Returns the SpanScorer score only. +

+ Should not be overriden without good cause! + +

+ the score for just the Span part w/o the payload + + IOException + + + + +
+ + The score for the payload + + + The score, as calculated by + {@link PayloadFunction#DocScore(int, String, int, float)} + + + + Implements search over a single IndexReader. + +

Applications usually need only call the inherited {@link #Search(Query)} + or {@link #Search(Query,Filter)} methods. For performance reasons it is + recommended to open only one IndexSearcher and use it for all of your searches. + +

Note that you can only access Hits from an IndexSearcher as long as it is + not yet closed, otherwise an IOException will be thrown. + +

NOTE: {@link + IndexSearcher} instances are completely + thread safe, meaning multiple threads can call any of its + methods, concurrently. If your application requires + external synchronization, you should not + synchronize on the IndexSearcher instance; + use your own (non-Lucene) objects instead.

+

+
+ + Creates a searcher searching the index in the named directory. + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + Use {@link #IndexSearcher(Directory, boolean)} instead + + + + Creates a searcher searching the index in the named + directory. You should pass readOnly=true, since it + gives much better concurrent performance, unless you + intend to do write operations (delete documents or + change norms) with the underlying IndexReader. + + directory where IndexReader will be opened + + if true, the underlying IndexReader + will be opened readOnly + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + Use {@link #IndexSearcher(Directory, boolean)} instead + + + + Creates a searcher searching the index in the provided directory. + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + Use {@link #IndexSearcher(Directory, boolean)} instead + + + + Creates a searcher searching the index in the named + directory. You should pass readOnly=true, since it + gives much better concurrent performance, unless you + intend to do write operations (delete documents or + change norms) with the underlying IndexReader. + + CorruptIndexException if the index is corrupt + IOException if there is a low-level IO error + directory where IndexReader will be opened + + if true, the underlying IndexReader + will be opened readOnly + + + + Creates a searcher searching the provided index. + + + Return the {@link IndexReader} this searches. + + + Note that the underlying IndexReader is not closed, if + IndexSearcher was constructed with IndexSearcher(IndexReader r). + If the IndexReader was supplied implicitly by specifying a directory, then + the IndexReader gets closed. + + + + Just like {@link #Search(Weight, Filter, int, Sort)}, but you choose + whether or not the fields in the returned {@link FieldDoc} instances + should be set by specifying fillFields.
+ +

+ NOTE: this does not compute scores by default. If you need scores, create + a {@link TopFieldCollector} instance by calling + {@link TopFieldCollector#create} and then pass that to + {@link #Search(Weight, Filter, Collector)}. +

+

+
+ + By default, no scores are computed when sorting by field (using + {@link #Search(Query,Filter,int,Sort)}). You can change that, per + IndexSearcher instance, by calling this method. Note that this will incur + a CPU cost. + + + If true, then scores are returned for every matching document + in {@link TopFieldDocs}. + + + If true, then the max score for all matching docs is computed. + + + + Creates a new instance with size elements. If + prePopulate is set to true, the queue will pre-populate itself + with sentinel objects and set its {@link #Size()} to size. In + that case, you should not rely on {@link #Size()} to get the number of + actual elements that were added to the queue, but keep track yourself.
+ NOTE: in case prePopulate is true, you should pop + elements from the queue using the following code example: + +
+            PriorityQueue pq = new HitQueue(10, true); // pre-populate.
+            ScoreDoc top = pq.top();
+            
+            // Add/Update one element.
+            top.score = 1.0f;
+            top.doc = 0;
+            top = (ScoreDoc) pq.updateTop();
+            int totalHits = 1;
+            
+            // Now pop only the elements that were *truly* inserted.
+            // First, pop all the sentinel elements (there are pq.size() - totalHits).
+            for (int i = pq.size() - totalHits; i > 0; i--) pq.pop();
+            
+            // Now pop the truly added elements.
+            ScoreDoc[] results = new ScoreDoc[totalHits];
+            for (int i = totalHits - 1; i >= 0; i--) {
+            results[i] = (ScoreDoc) pq.pop();
+            }
+            
+ +

NOTE: This class pre-allocate a full array of + length size. + +

+ the requested size of this queue. + + specifies whether to pre-populate the queue with sentinel values. + + + +
+ + A clause in a BooleanQuery. + + + The query whose matching documents are combined by the boolean query. + + + Constructs a BooleanClause. + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + Specifies how clauses are to occur in matching documents. + + + Use this operator for clauses that must appear in the matching documents. + + + Use this operator for clauses that should appear in the + matching documents. For a BooleanQuery with no MUST + clauses one or more SHOULD clauses must match a document + for the BooleanQuery to match. + + + + + + Use this operator for clauses that must not appear in the matching documents. + Note that it is not possible to search for queries that only consist + of a MUST_NOT clause. + + + +

This class provides a {@link Field} that enables indexing + of numeric values for efficient range filtering and + sorting. Here's an example usage, adding an int value: +

+            document.add(new NumericField(name).setIntValue(value));
+            
+ + For optimal performance, re-use the + NumericField and {@link Document} instance for more than + one document: + +
+            NumericField field = new NumericField(name);
+            Document document = new Document();
+            document.add(field);
+            
+            for(all documents) {
+            ...
+            field.setIntValue(value)
+            writer.addDocument(document);
+            ...
+            }
+            
+ +

The java native types int, long, + float and double are + directly supported. However, any value that can be + converted into these native types can also be indexed. + For example, date/time values represented by a + {@link java.util.Date} can be translated into a long + value using the {@link java.util.Date#getTime} method. If you + don't need millisecond precision, you can quantize the + value, either by dividing the result of + {@link java.util.Date#getTime} or using the separate getters + (for year, month, etc.) to construct an int or + long value.

+ +

To perform range querying or filtering against a + NumericField, use {@link NumericRangeQuery} or {@link + NumericRangeFilter}. To sort according to a + NumericField, use the normal numeric sort types, eg + {@link SortField#INT} (note that {@link SortField#AUTO} + will not work with these fields). NumericField values + can also be loaded directly from {@link FieldCache}.

+ +

By default, a NumericField's value is not stored but + is indexed for range filtering and sorting. You can use + the {@link #NumericField(String,Field.Store,boolean)} + constructor if you need to change these defaults.

+ +

You may add the same field name as a NumericField to + the same document more than once. Range querying and + filtering will be the logical OR of all values; so a range query + will hit all documents that have at least one value in + the range. However sort behavior is not defined. If you need to sort, + you should separately index a single-valued NumericField.

+ +

A NumericField will consume somewhat more disk space + in the index than an ordinary single-valued field. + However, for a typical index that includes substantial + textual content per document, this increase will likely + be in the noise.

+ +

Within Lucene, each numeric value is indexed as a + trie structure, where each term is logically + assigned to larger and larger pre-defined brackets (which + are simply lower-precision representations of the value). + The step size between each successive bracket is called the + precisionStep, measured in bits. Smaller + precisionStep values result in larger number + of brackets, which consumes more disk space in the index + but may result in faster range search performance. The + default value, 4, was selected for a reasonable tradeoff + of disk space consumption versus performance. You can + use the expert constructor {@link + #NumericField(String,int,Field.Store,boolean)} if you'd + like to change the value. Note that you must also + specify a congruent value when creating {@link + NumericRangeQuery} or {@link NumericRangeFilter}. + For low cardinality fields larger precision steps are good. + If the cardinality is < 100, it is fair + to use {@link Integer#MAX_VALUE}, which produces one + term per value. + +

For more information on the internals of numeric trie + indexing, including the precisionStep + configuration, see {@link NumericRangeQuery}. The format of + indexed values is described in {@link NumericUtils}. + +

If you only need to sort by numeric value, and never + run range querying/filtering, you can index using a + precisionStep of {@link Integer#MAX_VALUE}. + This will minimize disk space consumed.

+ +

More advanced users can instead use {@link + NumericTokenStream} directly, when indexing numbers. This + class is a wrapper around this token stream type for + easier, more intuitive usage.

+ +

NOTE: This class is only used during + indexing. When retrieving the stored field value from a + {@link Document} instance after search, you will get a + conventional {@link Fieldable} instance where the numeric + values are returned as {@link String}s (according to + toString(value) of the used data type). + +

NOTE: This API is + experimental and might change in incompatible ways in the + next release. + +

+ 2.9 + +
+ + Creates a field for numeric values using the default precisionStep + {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with + a numeric value, before indexing a document containing this field, + set a value using the various set???Value() methods. + This constructor creates an indexed, but not stored field. + + the field name + + + + Creates a field for numeric values using the default precisionStep + {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with + a numeric value, before indexing a document containing this field, + set a value using the various set???Value() methods. + + the field name + + if the field should be stored in plain text form + (according to toString(value) of the used data type) + + if the field should be indexed using {@link NumericTokenStream} + + + + Creates a field for numeric values with the specified + precisionStep. The instance is not yet initialized with + a numeric value, before indexing a document containing this field, + set a value using the various set???Value() methods. + This constructor creates an indexed, but not stored field. + + the field name + + the used precision step + + + + Creates a field for numeric values with the specified + precisionStep. The instance is not yet initialized with + a numeric value, before indexing a document containing this field, + set a value using the various set???Value() methods. + + the field name + + the used precision step + + if the field should be stored in plain text form + (according to toString(value) of the used data type) + + if the field should be indexed using {@link NumericTokenStream} + + + + Returns a {@link NumericTokenStream} for indexing the numeric value. + + + Returns always null for numeric fields + + + Returns always null for numeric fields + + + Returns always null for numeric fields + + + Returns the numeric value as a string (how it is stored, when {@link Field.Store#YES} is chosen). + + + Returns the current numeric value as a subclass of {@link Number}, null if not yet initialized. + + + Initializes the field with the supplied long value. + the numeric value + + this instance, because of this you can use it the following way: + document.add(new NumericField(name, precisionStep).SetLongValue(value)) + + + + Initializes the field with the supplied int value. + the numeric value + + this instance, because of this you can use it the following way: + document.add(new NumericField(name, precisionStep).setIntValue(value)) + + + + Initializes the field with the supplied double value. + the numeric value + + this instance, because of this you can use it the following way: + document.add(new NumericField(name, precisionStep).setDoubleValue(value)) + + + + Initializes the field with the supplied float value. + the numeric value + + this instance, because of this you can use it the following way: + document.add(new NumericField(name, precisionStep).setFloatValue(value)) + + + + Transforms the token stream as per the Porter stemming algorithm. + Note: the input to the stemming filter must already be in lower case, + so you will need to use LowerCaseFilter or LowerCaseTokenizer farther + down the Tokenizer chain in order for this to work properly! +

+ To use this filter with other analyzers, you'll want to write an + Analyzer class that sets up the TokenStream chain as you want it. + To use this with LowerCaseTokenizer, for example, you'd write an + analyzer like this: +

+

+            class MyAnalyzer extends Analyzer {
+            public final TokenStream tokenStream(String fieldName, Reader reader) {
+            return new PorterStemFilter(new LowerCaseTokenizer(reader));
+            }
+            }
+            
+
+
+ + This analyzer is used to facilitate scenarios where different + fields require different analysis techniques. Use {@link #addAnalyzer} + to add a non-default analyzer on a field name basis. + +

Example usage: + +

+            PerFieldAnalyzerWrapper aWrapper =
+            new PerFieldAnalyzerWrapper(new StandardAnalyzer());
+            aWrapper.addAnalyzer("firstname", new KeywordAnalyzer());
+            aWrapper.addAnalyzer("lastname", new KeywordAnalyzer());
+            
+ +

In this example, StandardAnalyzer will be used for all fields except "firstname" + and "lastname", for which KeywordAnalyzer will be used. + +

A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing + and query parsing. +

+
+ + Constructs with default analyzer. + + + Any fields not specifically + defined to use a different analyzer will use the one provided here. + + + + Constructs with default analyzer and a map of analyzers to use for + specific fields. + + + Any fields not specifically + defined to use a different analyzer will use the one provided here. + + a Map (String field name to the Analyzer) to be + used for those fields + + + + Defines an analyzer to use for the specified field. + + + field name requiring a non-default analyzer + + non-default analyzer to use for field + + + + Return the positionIncrementGap from the analyzer assigned to fieldName + + + Expert: This class provides a {@link TokenStream} + for indexing numeric values that can be used by {@link + NumericRangeQuery} or {@link NumericRangeFilter}. + +

Note that for simple usage, {@link NumericField} is + recommended. {@link NumericField} disables norms and + term freqs, as they are not usually needed during + searching. If you need to change these settings, you + should use this class. + +

See {@link NumericField} for capabilities of fields + indexed numerically.

+ +

Here's an example usage, for an int field: + +

+             Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
+             field.setOmitNorms(true);
+             field.setOmitTermFreqAndPositions(true);
+             document.add(field);
+            
+ +

For optimal performance, re-use the TokenStream and Field instance + for more than one document: + +

+             NumericTokenStream stream = new NumericTokenStream(precisionStep);
+             Field field = new Field(name, stream);
+             field.setOmitNorms(true);
+             field.setOmitTermFreqAndPositions(true);
+             Document document = new Document();
+             document.add(field);
+            
+             for(all documents) {
+               stream.setIntValue(value)
+               writer.addDocument(document);
+             }
+            
+ +

This stream is not intended to be used in analyzers; + it's more for iterating the different precisions during + indexing a specific numeric value.

+ +

NOTE: as token streams are only consumed once + the document is added to the index, if you index more + than one numeric field, use a separate NumericTokenStream + instance for each.

+ +

See {@link NumericRangeQuery} for more details on the + precisionStep + parameter as well as how numeric fields work under the hood.

+ +

NOTE: This API is experimental and + might change in incompatible ways in the next release. + +

+ 2.9 + +
+ + The full precision token gets this token type assigned. + + + The lower precision tokens gets this token type assigned. + + + Creates a token stream for numeric values using the default precisionStep + {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized, + before using set a value using the various set???Value() methods. + + + + Creates a token stream for numeric values with the specified + precisionStep. The stream is not yet initialized, + before using set a value using the various set???Value() methods. + + + + Expert: Creates a token stream for numeric values with the specified + precisionStep using the given {@link AttributeSource}. + The stream is not yet initialized, + before using set a value using the various set???Value() methods. + + + + Expert: Creates a token stream for numeric values with the specified + precisionStep using the given + {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. + The stream is not yet initialized, + before using set a value using the various set???Value() methods. + + + + Initializes the token stream with the supplied long value. + the value, for which this TokenStream should enumerate tokens. + + this instance, because of this you can use it the following way: + new Field(name, new NumericTokenStream(precisionStep).SetLongValue(value)) + + + + Initializes the token stream with the supplied int value. + the value, for which this TokenStream should enumerate tokens. + + this instance, because of this you can use it the following way: + new Field(name, new NumericTokenStream(precisionStep).SetIntValue(value)) + + + + Initializes the token stream with the supplied double value. + the value, for which this TokenStream should enumerate tokens. + + this instance, because of this you can use it the following way: + new Field(name, new NumericTokenStream(precisionStep).SetDoubleValue(value)) + + + + Initializes the token stream with the supplied float value. + the value, for which this TokenStream should enumerate tokens. + + this instance, because of this you can use it the following way: + new Field(name, new NumericTokenStream(precisionStep).SetFloatValue(value)) + + + + Normalizes token text to lower case. + + + $Id: LowerCaseFilter.java 797665 2009-07-24 21:45:48Z buschmi $ + + + + Replacement for Java 1.5 Character.valueOf() + Move to Character.valueOf() in 3.0 + + + + Returns a Character instance representing the given char value + + + a char value + + a Character representation of the given char value. + + + + This is a helper class to generate prefix-encoded representations for numerical values + and supplies converters to represent float/double values as sortable integers/longs. + +

To quickly execute range queries in Apache Lucene, a range is divided recursively + into multiple intervals for searching: The center of the range is searched only with + the lowest possible precision in the trie, while the boundaries are matched + more exactly. This reduces the number of terms dramatically. + +

This class generates terms to achive this: First the numerical integer values need to + be converted to strings. For that integer values (32 bit or 64 bit) are made unsigned + and the bits are converted to ASCII chars with each 7 bit. The resulting string is + sortable like the original integer value. Each value is also prefixed + (in the first char) by the shift value (number of bits removed) used + during encoding. + +

To also index floating point numbers, this class supplies two methods to convert them + to integer values by changing their bit layout: {@link #doubleToSortableLong}, + {@link #floatToSortableInt}. You will have no precision loss by + converting floating point numbers to integers and back (only that the integer form + is not usable). Other data types like dates can easily converted to longs or ints (e.g. + date to long: {@link java.util.Date#getTime}). + +

For easy usage, the trie algorithm is implemented for indexing inside + {@link NumericTokenStream} that can index int, long, + float, and double. For querying, + {@link NumericRangeQuery} and {@link NumericRangeFilter} implement the query part + for the same data types. + +

This class can also be used, to generate lexicographically sortable (according + {@link String#compareTo(String)}) representations of numeric data types for other + usages (e.g. sorting). + +

NOTE: This API is experimental and + might change in incompatible ways in the next release. + +

+ 2.9 + +
+ + The default precision step used by {@link NumericField}, {@link NumericTokenStream}, + {@link NumericRangeQuery}, and {@link NumericRangeFilter} as default + + + + Expert: The maximum term length (used for char[] buffer size) + for encoding long values. + + + + + + Expert: The maximum term length (used for char[] buffer size) + for encoding int values. + + + + + + Expert: Longs are stored at lower precision by shifting off lower bits. The shift count is + stored as SHIFT_START_LONG+shift in the first character + + + + Expert: Integers are stored at lower precision by shifting off lower bits. The shift count is + stored as SHIFT_START_INT+shift in the first character + + + + Expert: Returns prefix coded bits after reducing the precision by shift bits. + This is method is used by {@link NumericTokenStream}. + + the numeric value + + how many bits to strip from the right + + that will contain the encoded chars, must be at least of {@link #BUF_SIZE_LONG} + length + + number of chars written to buffer + + + + Expert: Returns prefix coded bits after reducing the precision by shift bits. + This is method is used by {@link LongRangeBuilder}. + + the numeric value + + how many bits to strip from the right + + + + This is a convenience method, that returns prefix coded bits of a long without + reducing the precision. It can be used to store the full precision value as a + stored field in index. +

To decode, use {@link #prefixCodedToLong}. +

+
+ + Expert: Returns prefix coded bits after reducing the precision by shift bits. + This is method is used by {@link NumericTokenStream}. + + the numeric value + + how many bits to strip from the right + + that will contain the encoded chars, must be at least of {@link #BUF_SIZE_INT} + length + + number of chars written to buffer + + + + Expert: Returns prefix coded bits after reducing the precision by shift bits. + This is method is used by {@link IntRangeBuilder}. + + the numeric value + + how many bits to strip from the right + + + + This is a convenience method, that returns prefix coded bits of an int without + reducing the precision. It can be used to store the full precision value as a + stored field in index. +

To decode, use {@link #prefixCodedToInt}. +

+
+ + Returns a long from prefixCoded characters. + Rightmost bits will be zero for lower precision codes. + This method can be used to decode e.g. a stored field. + + NumberFormatException if the supplied string is + not correctly prefix encoded. + + + + + + Returns an int from prefixCoded characters. + Rightmost bits will be zero for lower precision codes. + This method can be used to decode e.g. a stored field. + + NumberFormatException if the supplied string is + not correctly prefix encoded. + + + + + + Converts a double value to a sortable signed long. + The value is converted by getting their IEEE 754 floating-point "double format" + bit layout and then some bits are swapped, to be able to compare the result as long. + By this the precision is not reduced, but the value can easily used as a long. + + + + + + Convenience method: this just returns: + longToPrefixCoded(doubleToSortableLong(val)) + + + + Converts a sortable long back to a double. + + + + + Convenience method: this just returns: + sortableLongToDouble(prefixCodedToLong(val)) + + + + Converts a float value to a sortable signed int. + The value is converted by getting their IEEE 754 floating-point "float format" + bit layout and then some bits are swapped, to be able to compare the result as int. + By this the precision is not reduced, but the value can easily used as an int. + + + + + + Convenience method: this just returns: + intToPrefixCoded(floatToSortableInt(val)) + + + + Converts a sortable int back to a float. + + + + + Convenience method: this just returns: + sortableIntToFloat(prefixCodedToInt(val)) + + + + Expert: Splits a long range recursively. + You may implement a builder that adds clauses to a + {@link Lucene.Net.Search.BooleanQuery} for each call to its + {@link LongRangeBuilder#AddRange(String,String)} + method. +

This method is used by {@link NumericRangeQuery}. +

+
+ + Expert: Splits an int range recursively. + You may implement a builder that adds clauses to a + {@link Lucene.Net.Search.BooleanQuery} for each call to its + {@link IntRangeBuilder#AddRange(String,String)} + method. +

This method is used by {@link NumericRangeQuery}. +

+
+ + This helper does the splitting for both 32 and 64 bit. + + + Helper that delegates to correct range builder + + + Expert: Callback for {@link #splitLongRange}. + You need to overwrite only one of the methods. +

NOTE: This is a very low-level interface, + the method signatures may change in later versions. +

+
+ + Overwrite this method, if you like to receive the already prefix encoded range bounds. + You can directly build classical (inclusive) range queries from them. + + + + Overwrite this method, if you like to receive the raw long range bounds. + You can use this for e.g. debugging purposes (print out range bounds). + + + + Expert: Callback for {@link #splitIntRange}. + You need to overwrite only one of the methods. +

NOTE: This is a very low-level interface, + the method signatures may change in later versions. +

+
+ + Overwrite this method, if you like to receive the already prefix encoded range bounds. + You can directly build classical range (inclusive) queries from them. + + + + Overwrite this method, if you like to receive the raw int range bounds. + You can use this for e.g. debugging purposes (print out range bounds). + + + + An average, best guess, MemoryModel that should work okay on most systems. + + + + + Returns primitive memory sizes for estimating RAM usage. + + + + + size of array beyond contents + + + + Class size overhead + + + + a primitive Class - bool, byte, char, short, long, float, + short, double, int + + the size in bytes of given primitive Class + + + + size of reference + + + + A memory-resident {@link IndexOutput} implementation. + + + $Id: RAMOutputStream.java 691694 2008-09-03 17:34:29Z mikemccand $ + + + + Construct an empty output buffer. + + + Copy the current contents of this buffer to the named output. + + + Resets this to an empty buffer. + + + Returns byte usage of all buffers. + + + A Query that matches documents containing a term. + This may be combined with other terms with a {@link BooleanQuery}. + + + + Constructs a query for the term t. + + + Returns the term of this query. + + + Prints a user-readable version of this query. + + + Returns true iff o is equal to this. + + + Returns a hash code value for this object. + + + A Filter that restricts search results to a range of values in a given + field. + +

This filter matches the documents looking for terms that fall into the + supplied range according to {@link String#compareTo(String)}. It is not intended + for numerical ranges, use {@link NumericRangeFilter} instead. + +

If you construct a large number of range filters with different ranges but on the + same field, {@link FieldCacheRangeFilter} may have significantly better performance. + +

+ Use {@link TermRangeFilter} for term ranges or + {@link NumericRangeFilter} for numeric ranges instead. + This class will be removed in Lucene 3.0. + +
+ + The field this range applies to + + The lower bound on this range + + The upper bound on this range + + Does this range include the lower bound? + + Does this range include the upper bound? + + IllegalArgumentException if both terms are null or if + lowerTerm is null and includeLower is true (similar for upperTerm + and includeUpper) + + + + WARNING: Using this constructor and supplying a non-null + value in the collator parameter will cause every single + index Term in the Field referenced by lowerTerm and/or upperTerm to be + examined. Depending on the number of index Terms in this Field, the + operation could be very slow. + + + The lower bound on this range + + The upper bound on this range + + Does this range include the lower bound? + + Does this range include the upper bound? + + The collator to use when determining range inclusion; set + to null to use Unicode code point ordering instead of collation. + + IllegalArgumentException if both terms are null or if + lowerTerm is null and includeLower is true (similar for upperTerm + and includeUpper) + + + + Constructs a filter for field fieldName matching + less than or equal to upperTerm. + + + + Constructs a filter for field fieldName matching + greater than or equal to lowerTerm. + + + + Calculates the minimum payload seen + + + + + +

A {@link Query} that matches numeric values within a + specified range. To use this, you must first index the + numeric values using {@link NumericField} (expert: {@link + NumericTokenStream}). If your terms are instead textual, + you should use {@link TermRangeQuery}. {@link + NumericRangeFilter} is the filter equivalent of this + query.

+ +

You create a new NumericRangeQuery with the static + factory methods, eg: + +

+            Query q = NumericRangeQuery.newFloatRange("weight",
+            new Float(0.3f), new Float(0.10f),
+            true, true);
+            
+ + matches all documents whose float valued "weight" field + ranges from 0.3 to 0.10, inclusive. + +

The performance of NumericRangeQuery is much better + than the corresponding {@link TermRangeQuery} because the + number of terms that must be searched is usually far + fewer, thanks to trie indexing, described below.

+ +

You can optionally specify a precisionStep + when creating this query. This is necessary if you've + changed this configuration from its default (4) during + indexing. Lower values consume more disk space but speed + up searching. Suitable values are between 1 and + 8. A good starting point to test is 4, + which is the default value for all Numeric* + classes. See below for + details. + +

This query defaults to {@linkplain + MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} for + 32 bit (int/float) ranges with precisionStep <8 and 64 + bit (long/double) ranges with precisionStep <6. + Otherwise it uses {@linkplain + MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} as the + number of terms is likely to be high. With precision + steps of <4, this query can be run with one of the + BooleanQuery rewrite methods without changing + BooleanQuery's default max clause count. + +

NOTE: This API is experimental and + might change in incompatible ways in the next release. + +

How it works

+ +

See the publication about panFMP, + where this algorithm was described (referred to as TrieRangeQuery): + +

Schindler, U, Diepenbroek, M, 2008. + Generic XML-based Framework for Metadata Portals. + Computers & Geosciences 34 (12), 1947-1955. + doi:10.1016/j.cageo.2008.02.023
+ +

A quote from this paper: Because Apache Lucene is a full-text + search engine and not a conventional database, it cannot handle numerical ranges + (e.g., field value is inside user defined bounds, even dates are numerical values). + We have developed an extension to Apache Lucene that stores + the numerical values in a special string-encoded format with variable precision + (all numerical values like doubles, longs, floats, and ints are converted to + lexicographic sortable string representations and stored with different precisions + (for a more detailed description of how the values are stored, + see {@link NumericUtils}). A range is then divided recursively into multiple intervals for searching: + The center of the range is searched only with the lowest possible precision in the trie, + while the boundaries are matched more exactly. This reduces the number of terms dramatically.

+ +

For the variant that stores long values in 8 different precisions (each reduced by 8 bits) that + uses a lowest precision of 1 byte, the index contains only a maximum of 256 distinct values in the + lowest precision. Overall, a range could consist of a theoretical maximum of + 7*255*2 + 255 = 3825 distinct terms (when there is a term for every distinct value of an + 8-byte-number in the index and the range covers almost all of them; a maximum of 255 distinct values is used + because it would always be possible to reduce the full 256 values to one term with degraded precision). + In practice, we have seen up to 300 terms in most cases (index with 500,000 metadata records + and a uniform value distribution).

+ +

Precision Step

+

You can choose any precisionStep when encoding values. + Lower step values mean more precisions and so more terms in index (and index gets larger). + On the other hand, the maximum number of terms to match reduces, which optimized query speed. + The formula to calculate the maximum term count is: +

+            n = [ (bitsPerValue/precisionStep - 1) * (2^precisionStep - 1 ) * 2 ] + (2^precisionStep - 1 )
+            
+

(this formula is only correct, when bitsPerValue/precisionStep is an integer; + in other cases, the value must be rounded up and the last summand must contain the modulo of the division as + precision step). + For longs stored using a precision step of 4, n = 15*15*2 + 15 = 465, and for a precision + step of 2, n = 31*3*2 + 3 = 189. But the faster search speed is reduced by more seeking + in the term enum of the index. Because of this, the ideal precisionStep value can only + be found out by testing. Important: You can index with a lower precision step value and test search speed + using a multiple of the original step value.

+ +

Good values for precisionStep are depending on usage and data type: +

    +
  • The default for all data types is 4, which is used, when no precisionStep is given.
  • +
  • Ideal value in most cases for 64 bit data types (long, double) is 6 or 8.
  • +
  • Ideal value in most cases for 32 bit data types (int, float) is 4.
  • +
  • Steps >64 for long/double and >32 for int/float produces one token + per value in the index and querying is as slow as a conventional {@link TermRangeQuery}. But it can be used + to produce fields, that are solely used for sorting (in this case simply use {@link Integer#MAX_VALUE} as + precisionStep). Using {@link NumericField NumericFields} for sorting + is ideal, because building the field cache is much faster than with text-only numbers. + Sorting is also possible with range query optimized fields using one of the above precisionSteps.
  • +
+ +

Comparisons of the different types of RangeQueries on an index with about 500,000 docs showed + that {@link TermRangeQuery} in boolean rewrite mode (with raised {@link BooleanQuery} clause count) + took about 30-40 secs to complete, {@link TermRangeQuery} in constant score filter rewrite mode took 5 secs + and executing this class took <100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit + precision step). This query type was developed for a geographic portal, where the performance for + e.g. bounding boxes or exact date/time stamps is important.

+ +

+ 2.9 + + +
+ + Factory that creates a NumericRangeQuery, that queries a long + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a long + range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a int + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a int + range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a double + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a double + range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a float + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeQuery, that queries a float + range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Returns the field name for this query + + + Returns true if the lower endpoint is inclusive + + + Returns true if the upper endpoint is inclusive + + + Returns the lower value of this range query + + + Returns the upper value of this range query + + + + Lucene.Net specific. Needed for Serialization + + + + + + + Lucene.Net specific. Needed for deserialization + + + + + + Subclass of FilteredTermEnum for enumerating all terms that match the + sub-ranges for trie range queries. +

+ WARNING: This term enumeration is not guaranteed to be always ordered by + {@link Term#compareTo}. + The ordering depends on how {@link NumericUtils#splitLongRange} and + {@link NumericUtils#splitIntRange} generates the sub-ranges. For + {@link MultiTermQuery} ordering is not relevant. +

+
+ + this is a dummy, it is not used by this class. + + + Compares if current upper bound is reached, + this also updates the term count for statistics. + In contrast to {@link FilteredTermEnum}, a return value + of false ends iterating the current enum + and forwards to the next sub-range. + + + + Increments the enumeration to the next element. True if one exists. + + + Closes the enumeration to further activity, freeing resources. + + + Expert: A ScoreDoc which also contains information about + how to sort the referenced document. In addition to the + document number and score, this object contains an array + of values for the document from the field(s) used to sort. + For example, if the sort criteria was to sort by fields + "a", "b" then "c", the fields object array + will have three elements, corresponding respectively to + the term values for the document in fields "a", "b" and "c". + The class of each element in the array will be either + Integer, Float or String depending on the type of values + in the terms of each field. + +

Created: Feb 11, 2004 1:23:38 PM + +

+ lucene 1.4 + + $Id: FieldDoc.java 773194 2009-05-09 10:36:41Z mikemccand $ + + + + + +
+ + Expert: Returned by low-level search implementations. + + + + + Expert: The score of this document for the query. + + + Expert: A hit document's number. + + + + + Expert: Constructs a ScoreDoc. + + + Expert: The values which are used to sort the referenced document. + The order of these will match the original sort criteria given by a + Sort object. Each Object will be either an Integer, Float or String, + depending on the type of values in the terms of the original field. + + + + + + + + Expert: Creates one of these objects with empty sort information. + + + Expert: Creates one of these objects with the given sort information. + + + Store a sorted collection of {@link Lucene.Net.Index.TermVectorEntry}s. Collects all term information + into a single, SortedSet. +
+ NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/positions you will not + know what Fields they correlate with. +
+ This is not thread-safe +
+
+ + Stand-in name for the field in {@link TermVectorEntry}. + + + + A Comparator for sorting {@link TermVectorEntry}s + + + + + The term to map + + The frequency of the term + + Offset information, may be null + + Position information, may be null + + + + The TermVectorEntrySet. A SortedSet of {@link TermVectorEntry} objects. Sort is by the comparator passed into the constructor. +
+ This set will be empty until after the mapping process takes place. + +
+ The SortedSet of {@link TermVectorEntry}. + +
+ + This is a DocConsumer that gathers all fields under the + same name, and calls per-field consumers to process field + by field. This class doesn't doesn't do any "real" work + of its own: it just forwards the fields to a + DocFieldConsumer. + + + + Implements the skip list writer for the default posting list format + that stores positions and payloads. + + + + + Sets the values for the current skip data. + + + Implements the skip list reader for the default posting list format + that stores positions and payloads. + + + + + Returns the freq pointer of the doc to which the last call of + {@link MultiLevelSkipListReader#SkipTo(int)} has skipped. + + + + Returns the prox pointer of the doc to which the last call of + {@link MultiLevelSkipListReader#SkipTo(int)} has skipped. + + + + Returns the payload length of the payload stored just before + the doc to which the last call of {@link MultiLevelSkipListReader#SkipTo(int)} + has skipped. + + + + This exception is thrown when Lucene detects + an inconsistency in the index. + + + + Basic tool and API to check the health of an index and + write a new segments file that removes reference to + problematic segments. + +

As this tool checks every byte in the index, on a large + index it can take quite a long time to run. + +

WARNING: this tool and API is new and + experimental and is subject to suddenly change in the + next release. Please make a complete backup of your + index before using this to fix your index! +

+
+ + Default PrintStream for all CheckIndex instances. + Use {@link #setInfoStream} per instance, + instead. + + + + Create a new CheckIndex on the directory. + + + Set infoStream where messages should go. If null, no + messages are printed + + + + Returns true if index is clean, else false. + Please instantiate a CheckIndex and then use {@link #CheckIndex()} instead + + + + Returns true if index is clean, else false. + Please instantiate a CheckIndex and then use {@link #CheckIndex(List)} instead + + + + Returns a {@link Status} instance detailing + the state of the index. + +

As this method checks every byte in the index, on a large + index it can take quite a long time to run. + +

WARNING: make sure + you only call this when the index is not opened by any + writer. +

+
+ + Returns a {@link Status} instance detailing + the state of the index. + + + list of specific segment names to check + +

As this method checks every byte in the specified + segments, on a large index it can take quite a long + time to run. + +

WARNING: make sure + you only call this when the index is not opened by any + writer. + + + +

Test field norms. +
+ + Test the term index. + + + Test stored fields for a segment. + + + Test term vectors for a segment. + + + Repairs the index using previously returned result + from {@link #checkIndex}. Note that this does not + remove any of the unreferenced files after it's done; + you must separately open an {@link IndexWriter}, which + deletes unreferenced files when it's created. + +

WARNING: this writes a + new segments file into the index, effectively removing + all documents in broken segments from the index. + BE CAREFUL. + +

WARNING: Make sure you only call this when the + index is not opened by any writer. +

+
+ + Command-line interface to check and fix an index. +

+ Run it like this: +

+            java -ea:Lucene.Net... Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
+            
+
    +
  • -fix: actually write a new segments_N file, removing any problematic segments
  • +
  • -segment X: only check the specified + segment(s). This can be specified multiple times, + to check more than one segment, eg -segment _2 + -segment _a. You can't use this with the -fix + option.
  • +
+

WARNING: -fix should only be used on an emergency basis as it will cause + documents (perhaps many) to be permanently removed from the index. Always make + a backup copy of your index before running this! Do not run this tool on an index + that is actively being written to. You have been warned! +

Run without -fix, this tool will open the index, report version information + and report any exceptions it hits and what action it would take if -fix were + specified. With -fix, this tool will remove any segments that have issues and + write a new segments_N file. This means all documents contained in the affected + segments will be removed. +

+ This tool exits with exit code 1 if the index cannot be opened or has any + corruption, else 0. +

+
+ + Returned from {@link #CheckIndex()} detailing the health and status of the index. + +

WARNING: this API is new and experimental and is + subject to suddenly change in the next release. + +

+
+ + True if no problems were found with the index. + + + True if we were unable to locate and load the segments_N file. + + + True if we were unable to open the segments_N file. + + + True if we were unable to read the version number from segments_N file. + + + Name of latest segments_N file in the index. + + + Number of segments in the index. + + + String description of the version of the index. + + + Empty unless you passed specific segments list to check as optional 3rd argument. + + + + + True if the index was created with a newer version of Lucene than the CheckIndex tool. + + + List of {@link SegmentInfoStatus} instances, detailing status of each segment. + + + Directory index is in. + + + SegmentInfos instance containing only segments that + had no problems (this is used with the {@link CheckIndex#fixIndex} + method to repair the index. + + + + How many documents will be lost to bad segments. + + + How many bad segments were found. + + + True if we checked only specific segments ({@link + #CheckIndex(List)}) was called with non-null + argument). + + + + Holds the userData of the last commit in the index + + + Holds the status of each segment in the index. + See {@link #segmentInfos}. + +

WARNING: this API is new and experimental and is + subject to suddenly change in the next release. +

+
+ + Name of the segment. + + + Document count (does not take deletions into account). + + + True if segment is compound file format. + + + Number of files referenced by this segment. + + + Net size (MB) of the files referenced by this + segment. + + + + Doc store offset, if this segment shares the doc + store files (stored fields and term vectors) with + other segments. This is -1 if it does not share. + + + + String of the shared doc store segment, or null if + this segment does not share the doc store files. + + + + True if the shared doc store files are compound file + format. + + + + True if this segment has pending deletions. + + + Name of the current deletions file name. + + + Number of deleted documents. + + + True if we were able to open a SegmentReader on this + segment. + + + + Number of fields in this segment. + + + True if at least one of the fields in this segment + does not omitTermFreqAndPositions. + + + + + + Map<String, String> that includes certain + debugging details that IndexWriter records into + each segment it creates + + + + Status for testing of field norms (null if field norms could not be tested). + + + Status for testing of indexed terms (null if indexed terms could not be tested). + + + Status for testing of stored fields (null if stored fields could not be tested). + + + Status for testing of term vectors (null if term vectors could not be tested). + + + Status from testing field norms. + + + Number of fields successfully tested + + + Exception thrown during term index test (null on success) + + + Status from testing term index. + + + Total term count + + + Total frequency across all terms. + + + Total number of positions. + + + Exception thrown during term index test (null on success) + + + Status from testing stored fields. + + + Number of documents tested. + + + Total number of stored fields tested. + + + Exception thrown during stored fields test (null on success) + + + Status from testing stored fields. + + + Number of documents tested. + + + Total number of term vectors tested. + + + Exception thrown during term vector test (null on success) + + + Borrowed from Cglib. Allows custom swap so that two arrays can be sorted + at the same time. + + + + Simple lockless and memory barrier free String intern cache that is guaranteed + to return the same String instance as String.intern() does. + + + + Size of the hash table, should be a power of two. + + Maximum length of each bucket, after which the oldest item inserted is dropped. + + + + Common util methods for dealing with {@link IndexReader}s. + + + + + Gathers sub-readers from reader into a List. + + + + + + + + + Returns sub IndexReader that contains the given document id. + + + id of document + + parent reader + + sub reader of parent which contains the specified doc id + + + + Returns sub-reader subIndex from reader. + + + parent reader + + index of desired sub reader + + the subreader at subINdex + + + + Returns index of the searcher/reader for document n in the + array used to construct this searcher/reader. + + + + Optimized implementation of a vector of bits. This is more-or-less like + java.util.BitSet, but also includes the following: +
    +
  • a count() method, which efficiently computes the number of one bits;
  • +
  • optimized read from and write to disk;
  • +
  • inlinable get() method;
  • +
  • store and load, as bit set or d-gaps, depending on sparseness;
  • +
+
+ $Id: BitVector.java 765649 2009-04-16 14:29:26Z mikemccand $ + +
+ + Constructs a vector capable of holding n bits. + + + Sets the value of bit to one. + + + Sets the value of bit to true, and + returns true if bit was already set + + + + Sets the value of bit to zero. + + + Returns true if bit is one and + false if it is zero. + + + + Returns the number of bits in this vector. This is also one greater than + the number of the largest valid bit number. + + + + Returns the total number of one bits in this vector. This is efficiently + computed and cached, so that, if the vector is not changed, no + recomputation is done for repeated calls. + + + + Writes this vector to the file name in Directory + d, in a format that can be read by the constructor {@link + #BitVector(Directory, String)}. + + + + Write as a bit set + + + Write as a d-gaps list + + + Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. + + + Constructs a bit vector from the file name in Directory + d, as written by the {@link #write} method. + + + + Read as a bit set + + + read as a d-gaps list + + + Retrieve a subset of this BitVector. + + + starting index, inclusive + + ending index, exclusive + + subset + + + + A {@link LockFactory} that wraps another {@link + LockFactory} and verifies that each lock obtain/release + is "correct" (never results in two processes holding the + lock at the same time). It does this by contacting an + external server ({@link LockVerifyServer}) to assert that + at most one process holds the lock at a time. To use + this, you should also run {@link LockVerifyServer} on the + host & port matching what you pass to the constructor. + + + + + + + + + should be a unique id across all clients + + the LockFactory that we are testing + + host or IP where {@link LockVerifyServer} + is running + + the port {@link LockVerifyServer} is + listening on + + + + Matches spans which are near one another. One can specify slop, the + maximum number of intervening unmatched positions, as well as whether + matches are required to be in-order. + + + + Construct a SpanNearQuery. Matches spans matching a span from each + clause, with up to slop total unmatched positions between + them. * When inOrder is true, the spans from each clause + must be * ordered as in clauses. + + + + Return the clauses whose spans are matched. + + + Return the maximum number of intervening unmatched positions permitted. + + + Return true if matches are required to be in-order. + + + Returns a collection of all terms matched by this query. + use extractTerms instead + + + + + + Returns true iff o is equal to this. + + + Filter caching singleton. It can be used + to save filters locally for reuse. + This class makes it possble to cache Filters even when using RMI, as it + keeps the cache on the seaercher side of the RMI connection. + + Also could be used as a persistent storage for any filter as long as the + filter provides a proper hashCode(), as that is used as the key in the cache. + + The cache is periodically cleaned up from a separate thread to ensure the + cache doesn't exceed the maximum size. + + + + The default maximum number of Filters in the cache + + + The default frequency of cache clenup + + + The cache itself + + + Maximum allowed cache size + + + Cache cleaning frequency + + + Cache cleaner that runs in a separate thread + + + Sets up the FilterManager singleton. + + + Sets the max size that cache should reach before it is cleaned up + maximum allowed cache size + + + + Sets the cache cleaning frequency in milliseconds. + cleaning frequency in millioseconds + + + + Returns the cached version of the filter. Allows the caller to pass up + a small filter but this will keep a persistent version around and allow + the caching filter to do its job. + + + The input filter + + The cached version of the filter + + + + Holds the filter and the last time the filter was used, to make LRU-based + cache cleaning possible. + TODO: Clean this up when we switch to Java 1.5 + + + + Keeps the cache from getting too big. + If we were using Java 1.5, we could use LinkedHashMap and we would not need this thread + to clean out the cache. + + The SortedSet sortedFilterItems is used only to sort the items from the cache, + so when it's time to clean up we have the TreeSet sort the FilterItems by + timestamp. + + Removes 1.5 * the numbers of items to make the cache smaller. + For example: + If cache clean size is 10, and the cache is at 15, we would remove (15 - 10) * 1.5 = 7.5 round up to 8. + This way we clean the cache a bit more, and avoid having the cache cleaner having to do it frequently. + + + + Extends TermFreqVector to provide additional information about + positions in which each of the terms is found. A TermPositionVector not necessarily + contains both positions and offsets, but at least one of these arrays exists. + + + + Returns an array of positions in which the term is found. + Terms are identified by the index at which its number appears in the + term String array obtained from the indexOf method. + May return null if positions have not been stored. + + + + Returns an array of TermVectorOffsetInfo in which the term is found. + May return null if offsets have not been stored. + + + + + + The position in the array to get the offsets from + + An array of TermVectorOffsetInfo objects or the empty list + + + + This is a {@link LogMergePolicy} that measures size of a + segment as the total byte size of the segment's files. + + + + + + + + Default maximum segment size. A segment of this size + + + + +

Determines the largest segment (measured by total + byte size of the segment's files, in MB) that may be + merged with other segments. Small values (e.g., less + than 50 MB) are best for interactive indexing, as this + limits the length of pauses while indexing to a few + seconds. Larger values are best for batched indexing + and speedier searches.

+ +

Note that {@link #setMaxMergeDocs} is also + used to check whether a segment is too large for + merging (it's either or).

+

+
+ + Returns the largest segment (meaured by total byte + size of the segment's files, in MB) that may be merged + with other segments. + + + + + + Sets the minimum size for the lowest level segments. + Any segments below this size are considered to be on + the same level (even if they vary drastically in size) + and will be merged whenever there are mergeFactor of + them. This effectively truncates the "long tail" of + small segments that would otherwise be created into a + single level. If you set this too large, it could + greatly increase the merging cost during indexing (if + you flush many small segments). + + + + Get the minimum size for a segment to remain + un-merged. + + + + + + This attribute can be used to pass different flags down the tokenizer chain, + eg from one TokenFilter to another one. + + + + EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. +

+ + Get the bitset for any bits that have been set. This is completely distinct from {@link TypeAttribute#Type()}, although they do share similar purposes. + The flags can be used to encode information about the token for use by other {@link Lucene.Net.Analysis.TokenFilter}s. + + +

+ The bits + +
+ + + + + + This class can be used if the token attributes of a TokenStream + are intended to be consumed more than once. It caches + all token attribute states locally in a List. + +

CachingTokenFilter implements the optional method + {@link TokenStream#Reset()}, which repositions the + stream to the first Token. +

+
+ + Will be removed in Lucene 3.0. This method is final, as it should + not be overridden. Delegates to the backwards compatibility layer. + + + + Will be removed in Lucene 3.0. This method is final, as it should + not be overridden. Delegates to the backwards compatibility layer. + + + + Methods for manipulating strings. + + $Id: StringHelper.java 801344 2009-08-05 18:05:06Z yonik $ + + + + Expert: + The StringInterner implementation used by Lucene. + This shouldn't be changed to an incompatible implementation after other Lucene APIs have been used. + + + + Return the same string object for all equal strings + + + Compares two byte[] arrays, element by element, and returns the + number of elements common to both arrays. + + + The first byte[] to compare + + The second byte[] to compare + + The number of common elements. + + + + Compares two strings, character by character, and returns the + first position where the two strings differ from one another. + + + The first string to compare + + The second string to compare + + The first position where the two strings differ. + + + + Some useful constants. + + + + $Id: Constants.java 828327 2009-10-22 06:47:40Z uschindler $ + + + + + The value of System.getProperty("java.version"). * + + + True iff this is Java version 1.1. + + + True iff this is Java version 1.2. + + + True iff this is Java version 1.3. + + + The value of System.getProperty("os.name"). * + + + True iff running on Linux. + + + True iff running on Windows. + + + True iff running on SunOS. + + + This exception is thrown when the write.lock + could not be released. + + + + + + + Represents hits returned by {@link Searcher#search(Query,Filter,int,Sort)}. + + + + The fields which were used to sort results by. + + + Creates one of these objects. + Total number of hits for the query. + + The top hits for the query. + + The sort criteria used to find the top hits. + + The maximum score encountered. + + + + A Query that matches documents within an exclusive range of terms. + +

This query matches the documents looking for terms that fall into the + supplied range according to {@link Term#CompareTo(Term)}. It is not intended + for numerical ranges, use {@link NumericRangeQuery} instead. + +

This query uses {@linkplain + MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE}. If you + want to change this, use the new {@link TermRangeQuery} + instead. + +

+ Use {@link TermRangeQuery} for term ranges or + {@link NumericRangeQuery} for numeric ranges instead. + This class will be removed in Lucene 3.0. + +
+ + Constructs a query selecting all terms greater than + lowerTerm but less than upperTerm. + There must be at least one term and either term may be null, + in which case there is no bound on that side, but if there are + two terms, both terms must be for the same field. + + + The Term at the lower end of the range + + The Term at the upper end of the range + + If true, both lowerTerm and + upperTerm will themselves be included in the range. + + + + Constructs a query selecting all terms greater than + lowerTerm but less than upperTerm. + There must be at least one term and either term may be null, + in which case there is no bound on that side, but if there are + two terms, both terms must be for the same field. +

+ If collator is not null, it will be used to decide whether + index terms are within the given range, rather than using the Unicode code + point order in which index terms are stored. +

+ WARNING: Using this constructor and supplying a non-null + value in the collator parameter will cause every single + index Term in the Field referenced by lowerTerm and/or upperTerm to be + examined. Depending on the number of index Terms in this Field, the + operation could be very slow. + +

+ The Term at the lower end of the range + + The Term at the upper end of the range + + If true, both lowerTerm and + upperTerm will themselves be included in the range. + + The collator to use to collate index Terms, to determine + their membership in the range bounded by lowerTerm and + upperTerm. + +
+ + Returns the field name for this query + + + Returns the lower term of this range query. + + + Returns the upper term of this range query. + + + Returns true if the range query is inclusive + + + Returns the collator used to determine range inclusion, if any. + + + Prints a user-readable version of this query. + + + Returns true iff o is equal to this. + + + Returns a hash code value for this object. + + + A Query that matches documents containing a particular sequence of terms. + A PhraseQuery is built by QueryParser for input like "new york". + +

This query may be combined with other terms or queries with a {@link BooleanQuery}. +

+
+ + Constructs an empty phrase query. + + + Sets the number of other words permitted between words in query phrase. + If zero, then this is an exact phrase search. For larger values this works + like a WITHIN or NEAR operator. +

The slop is in fact an edit-distance, where the units correspond to + moves of terms in the query phrase out of position. For example, to switch + the order of two words requires two moves (the first move places the words + atop one another), so to permit re-orderings of phrases, the slop must be + at least two. +

More exact matches are scored higher than sloppier matches, thus search + results are sorted by exactness. +

The slop is zero by default, requiring exact matches. +

+
+ + Returns the slop. See setSlop(). + + + Adds a term to the end of the query phrase. + The relative position of the term is the one immediately after the last term added. + + + + Adds a term to the end of the query phrase. + The relative position of the term within the phrase is specified explicitly. + This allows e.g. phrases with more than one term at the same position + or phrases with gaps (e.g. in connection with stopwords). + + + + + + + + + Returns the set of terms in this phrase. + + + Returns the relative positions of terms in this phrase. + + + + + + + Prints a user-readable version of this query. + + + Returns true iff o is equal to this. + + + Returns a hash code value for this object. + + + Expert: a FieldComparator compares hits so as to determine their + sort order when collecting the top results with {@link + TopFieldCollector}. The concrete public FieldComparator + classes here correspond to the SortField types. + +

This API is designed to achieve high performance + sorting, by exposing a tight interaction with {@link + FieldValueHitQueue} as it visits hits. Whenever a hit is + competitive, it's enrolled into a virtual slot, which is + an int ranging from 0 to numHits-1. The {@link + FieldComparator} is made aware of segment transitions + during searching in case any internal state it's tracking + needs to be recomputed during these transitions.

+ +

A comparator must define these functions:

+ +

    + +
  • {@link #compare} Compare a hit at 'slot a' + with hit 'slot b'.
  • + +
  • {@link #setBottom} This method is called by + {@link FieldValueHitQueue} to notify the + FieldComparator of the current weakest ("bottom") + slot. Note that this slot may not hold the weakest + value according to your comparator, in cases where + your comparator is not the primary one (ie, is only + used to break ties from the comparators before it).
  • + +
  • {@link #compareBottom} Compare a new hit (docID) + against the "weakest" (bottom) entry in the queue.
  • + +
  • {@link #copy} Installs a new hit into the + priority queue. The {@link FieldValueHitQueue} + calls this method when a new hit is competitive.
  • + +
  • {@link #setNextReader} Invoked + when the search is switching to the next segment. + You may need to update internal state of the + comparator, for example retrieving new values from + the {@link FieldCache}.
  • + +
  • {@link #value} Return the sort value stored in + the specified slot. This is only called at the end + of the search, in order to populate {@link + FieldDoc#fields} when returning the top results.
  • +
+ + NOTE: This API is experimental and might change in + incompatible ways in the next release. +
+
+ + Compare hit at slot1 with hit at slot2. + + + first slot to compare + + second slot to compare + + any N < 0 if slot2's value is sorted after + slot1, any N > 0 if the slot2's value is sorted before + slot1 and 0 if they are equal + + + + Set the bottom slot, ie the "weakest" (sorted last) + entry in the queue. When {@link #compareBottom} is + called, you should compare against this slot. This + will always be called before {@link #compareBottom}. + + + the currently weakest (sorted last) slot in the queue + + + + Compare the bottom of the queue with doc. This will + only invoked after setBottom has been called. This + should return the same result as {@link + #Compare(int,int)}} as if bottom were slot1 and the new + document were slot 2. + +

For a search that hits many results, this method + will be the hotspot (invoked by far the most + frequently).

+ +

+ that was hit + + any N < 0 if the doc's value is sorted after + the bottom entry (not competitive), any N > 0 if the + doc's value is sorted before the bottom entry and 0 if + they are equal. + +
+ + This method is called when a new hit is competitive. + You should copy any state associated with this document + that will be required for future comparisons, into the + specified slot. + + + which slot to copy the hit to + + docID relative to current reader + + + + Set a new Reader. All doc correspond to the current Reader. + + + current reader + + docBase of this reader + + IOException + IOException + + + Sets the Scorer to use in case a document's score is + needed. + + + Scorer instance that you should use to + obtain the current hit's score, if necessary. + + + + Return the actual value in the slot. + + + the value + + value in this slot upgraded to Comparable + + + + Parses field's values as byte (using {@link + FieldCache#getBytes} and sorts by ascending value + + + + Sorts by ascending docID + + + Parses field's values as double (using {@link + FieldCache#getDoubles} and sorts by ascending value + + + + Parses field's values as float (using {@link + FieldCache#getFloats} and sorts by ascending value + + + + Parses field's values as int (using {@link + FieldCache#getInts} and sorts by ascending value + + + + Parses field's values as long (using {@link + FieldCache#getLongs} and sorts by ascending value + + + + Sorts by descending relevance. NOTE: if you are + sorting only by descending relevance and then + secondarily by ascending docID, peformance is faster + using {@link TopScoreDocCollector} directly (which {@link + IndexSearcher#search} uses when no {@link Sort} is + specified). + + + + Parses field's values as short (using {@link + FieldCache#getShorts} and sorts by ascending value + + + + Sorts by a field's value using the Collator for a + given Locale. + + + + Sorts by field's natural String sort order, using + ordinals. This is functionally equivalent to {@link + StringValComparator}, but it first resolves the string + to their relative ordinal positions (using the index + returned by {@link FieldCache#getStringIndex}), and + does most comparisons using the ordinals. For medium + to large results, this comparator will be much faster + than {@link StringValComparator}. For very small + result sets it may be slower. + + + + Sorts by field's natural String sort order. All + comparisons are done using String.compareTo, which is + slow for medium to large result sets but possibly + very fast for very small results sets. + + + + A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum + score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries. + This is useful when searching for a word in multiple fields with different boost factors (so that the fields cannot be + combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost, + not the sum of the field scores (as BooleanQuery would give). + If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching + another gets a higher score than "albino" matching both fields. + To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in + each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery. + The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that + include this term in only the best of those multiple fields, without confusing this with the better case of two different terms + in the multiple fields. + + + + Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries. + the score of each non-maximum disjunct for a document is multiplied by this weight + and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that + 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique + word in the lower scored field (i.e., one that is not in any higher scored field. + + + + Creates a new DisjunctionMaxQuery + a Collection<Query> of all the disjuncts to add + + the weight to give to each matching non-maximum disjunct + + + + Add a subquery to this disjunction + the disjunct added + + + + Add a collection of disjuncts to this disjunction + via Iterable + + + + An Iterator<Query> over the disjuncts + + + Optimize our representation and our subqueries representations + the IndexReader we query + + an optimized copy of us (which may not be a copy if there is nothing to optimize) + + + + Create a shallow copy of us -- used in rewriting if necessary + a copy of us (but reuse, don't copy, our subqueries) + + + + Prettyprint us. + the field to which we are applied + + a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost" + + + + Return true iff we represent the same query as o + another object + + true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us + + + + Compute a hash code for hashing us + the hash code + + + + Expert: the Weight for DisjunctionMaxQuery, used to + normalize, score and explain these queries. + +

NOTE: this API and implementation is subject to + change suddenly in the next release.

+

+
+ + The Similarity implementation. + + + The Weights for our subqueries, in 1-1 correspondence with disjuncts + + + Expert: Default scoring implementation. + + + Implemented as + state.getBoost()*lengthNorm(numTerms), where + numTerms is {@link FieldInvertState#GetLength()} if {@link + #setDiscountOverlaps} is false, else it's {@link + FieldInvertState#GetLength()} - {@link + FieldInvertState#GetNumOverlap()}. + +

WARNING: This API is new and experimental, and may suddenly + change.

+

+
+ + Implemented as 1/sqrt(numTerms). + + + Implemented as 1/sqrt(sumOfSquaredWeights). + + + Implemented as sqrt(freq). + + + Implemented as 1 / (distance + 1). + + + Implemented as log(numDocs/(docFreq+1)) + 1. + + + Implemented as overlap / maxOverlap. + + + Determines whether overlap tokens (Tokens with + 0 position increment) are ignored when computing + norm. By default this is false, meaning overlap + tokens are counted just like non-overlap tokens. + +

WARNING: This API is new and experimental, and may suddenly + change.

+ +

+ + +
+ + + + + + Called once per field per document if term vectors + are enabled, to write the vectors to + RAMOutputStream, which is then quickly flushed to + * the real term vectors files in the Directory. + + + + Convenience class for holding TermVector information. + + + An IndexReader which reads indexes with multiple segments. + + + Construct reading the named set of readers. + + + This constructor is only used for {@link #Reopen()} + + + Version number when this IndexReader was opened. + + + Checks is the index is optimized (if it has a single segment and no deletions) + true if the index is optimized; false otherwise + + + + Tries to acquire the WriteLock on this directory. this method is only valid if this IndexReader is directory + owner. + + + StaleReaderException if the index has changed since this reader was opened + CorruptIndexException if the index is corrupt + Lucene.Net.Store.LockObtainFailedException + if another writer has this index open (write.lock could not be + obtained) + + IOException if there is a low-level IO error + + + + + + + Commit changes resulting from delete, undeleteAll, or setNorm operations +

+ If an exception is hit, then either no changes or all changes will have been committed to the index (transactional + semantics). + +

+ IOException if there is a low-level IO error +
+ + Returns the directory this index resides in. + + + Expert: return the IndexCommit that this reader has opened. +

+

WARNING: this API is new and experimental and may suddenly change.

+

+
+ + + + + + Optimized implementation. + + + Allows you to iterate over the {@link TermPositions} for multiple {@link Term}s as + a single {@link TermPositions}. + + + + + Creates a new MultipleTermPositions instance. + + + + + + + Not implemented. + UnsupportedOperationException + + + Not implemented. + UnsupportedOperationException + + + Not implemented. + UnsupportedOperationException + + + Not implemented. + UnsupportedOperationException + + + Not implemented. + UnsupportedOperationException + + + + false + + + + Access to the Fieldable Info file that describes document fields and whether or + not they are indexed. Each segment has a separate Fieldable Info file. Objects + of this class are thread-safe for multiple readers, but only one thread can + be adding documents at a time, with no other reader or writer threads + accessing this object. + + + + Construct a FieldInfos object using the directory and the name of the file + IndexInput + + The directory to open the IndexInput from + + The name of the file to open the IndexInput from in the Directory + + IOException + + + Returns a deep clone of this FieldInfos instance. + + + Adds field info for a Document. + + + Returns true if any fields do not omitTermFreqAndPositions + + + Add fields that are indexed. Whether they have termvectors has to be specified. + + + The names of the fields + + Whether the fields store term vectors or not + + true if positions should be stored. + + true if offsets should be stored + + + + Assumes the fields are not storing term vectors. + + + The names of the fields + + Whether the fields are indexed or not + + + + + + + Calls 5 parameter add with false for all TermVector parameters. + + + The name of the Fieldable + + true if the field is indexed + + + + + + Calls 5 parameter add with false for term vector positions and offsets. + + + The name of the field + + true if the field is indexed + + true if the term vector should be stored + + + + If the field is not yet known, adds it. If it is known, checks to make + sure that the isIndexed flag is the same as was given previously for this + field. If not - marks it as being indexed. Same goes for the TermVector + parameters. + + + The name of the field + + true if the field is indexed + + true if the term vector should be stored + + true if the term vector with positions should be stored + + true if the term vector with offsets should be stored + + + + If the field is not yet known, adds it. If it is known, checks to make + sure that the isIndexed flag is the same as was given previously for this + field. If not - marks it as being indexed. Same goes for the TermVector + parameters. + + + The name of the field + + true if the field is indexed + + true if the term vector should be stored + + true if the term vector with positions should be stored + + true if the term vector with offsets should be stored + + true if the norms for the indexed field should be omitted + + + + If the field is not yet known, adds it. If it is known, checks to make + sure that the isIndexed flag is the same as was given previously for this + field. If not - marks it as being indexed. Same goes for the TermVector + parameters. + + + The name of the field + + true if the field is indexed + + true if the term vector should be stored + + true if the term vector with positions should be stored + + true if the term vector with offsets should be stored + + true if the norms for the indexed field should be omitted + + true if payloads should be stored for this field + + true if term freqs should be omitted for this field + + + + Return the fieldName identified by its number. + + + + + the fieldName or an empty string when the field + with the given number doesn't exist. + + + + Return the fieldinfo object referenced by the fieldNumber. + + + the FieldInfo object or null when the given fieldNumber + doesn't exist. + + + + Holds all per thread, per field state. + + + A Token's lexical type. The Default value is "word". + + + Returns this Token's lexical type. Defaults to "word". + + + Set the lexical type. + + + + + Removes words that are too long or too short from the stream. + + + + $Id: LengthFilter.java 807201 2009-08-24 13:22:34Z markrmiller $ + + + + Build a filter that removes words that are too long or too + short from the text. + + + + Returns the next input Token whose term() is the right len + + + This class converts alphabetic, numeric, and symbolic Unicode characters + which are not in the first 127 ASCII characters (the "Basic Latin" Unicode + block) into their ASCII equivalents, if one exists. + + Characters from the following Unicode blocks are converted; however, only + those characters with reasonable ASCII alternatives are converted: + + + + See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode + + The set of character conversions supported by this class is a superset of + those supported by Lucene's {@link ISOLatin1AccentFilter} which strips + accents from Latin1 characters. For example, 'À' will be replaced by + 'a'. + + + + Converts characters above ASCII to their ASCII equivalents. For example, + accents are removed from accented characters. + + The string to fold + + The number of characters in the input string + + + + Helper class for keeping Listss of Objects associated with keys. WARNING: THIS CLASS IS NOT THREAD SAFE + + + the backing store for this object + + + + direct access to the map backing this object. + + + + Adds val to the Set associated with key in the Map. If key is not + already in the map, a new Set will first be created. + + the size of the Set associated with key once val is added to it. + + + + Adds multiple vals to the Set associated with key in the Map. + If key is not + already in the map, a new Set will first be created. + + the size of the Set associated with key once val is added to it. + + + + Simple DocIdSet and DocIdSetIterator backed by a BitSet + + + This DocIdSet implementation is cacheable. + + + Returns the underlying BitSet. + + + use {@link #DocID()} instead. + + + + use {@link #NextDoc()} instead. + + + + use {@link #Advance(int)} instead. + + + + File-based {@link Directory} implementation that uses + mmap for reading, and {@link + SimpleFSDirectory.SimpleFSIndexOutput} for writing. + +

NOTE: memory mapping uses up a portion of the + virtual memory address space in your process equal to the + size of the file being mapped. Before using this class, + be sure your have plenty of virtual address space, e.g. by + using a 64 bit JRE, or a 32 bit JRE with indexes that are + guaranteed to fit within the address space. + On 32 bit platforms also consult {@link #setMaxChunkSize} + if you have problems with mmap failing because of fragmented + address space. If you get an OutOfMemoryException, it is recommened + to reduce the chunk size, until it works. + +

Due to + this bug in Sun's JRE, MMapDirectory's {@link IndexInput#close} + is unable to close the underlying OS file handle. Only when GC + finally collects the underlying objects, which could be quite + some time later, will the file handle be closed. + +

This will consume additional transient disk usage: on Windows, + attempts to delete or overwrite the files will result in an + exception; on other platforms, which typically have a "delete on + last close" semantics, while such operations will succeed, the bytes + are still consuming space on disk. For many applications this + limitation is not a problem (e.g. if you have plenty of disk space, + and you don't rely on overwriting files on Windows) but it's still + an important limitation to be aware of. + +

This class supplies the workaround mentioned in the bug report + (disabled by default, see {@link #setUseUnmap}), which may fail on + non-Sun JVMs. It forcefully unmaps the buffer on close by using + an undocumented internal cleanup functionality. + {@link #UNMAP_SUPPORTED} is true, if the workaround + can be enabled (with no guarantees). +

+
+ + Create a new MMapDirectory for the named location. + + + the path of the directory + + the lock factory to use, or null for the default. + + IOException + + + Create a new MMapDirectory for the named location. + + + the path of the directory + + the lock factory to use, or null for the default. + + IOException + + + Create a new MMapDirectory for the named location and the default lock factory. + + + the path of the directory + + IOException + + + Create a new MMapDirectory for the named location and the default lock factory. + + + the path of the directory + + IOException + + + + + + + true, if this platform supports unmapping mmaped files. + + + This method enables the workaround for unmapping the buffers + from address space after closing {@link IndexInput}, that is + mentioned in the bug report. This hack may fail on non-Sun JVMs. + It forcefully unmaps the buffer on close by using + an undocumented internal cleanup functionality. +

NOTE: Enabling this is completely unsupported + by Java and may lead to JVM crashs if IndexInput + is closed while another thread is still accessing it (SIGSEGV). +

+ IllegalArgumentException if {@link #UNMAP_SUPPORTED} + is false and the workaround cannot be enabled. + +
+ + Returns true, if the unmap workaround is enabled. + + + + + Try to unmap the buffer, this method silently fails if no support + for that in the JVM. On Windows, this leads to the fact, + that mmapped files cannot be modified or deleted. + + + + Sets the maximum chunk size (default is {@link Integer#MAX_VALUE} for + 64 bit JVMs and 256 MiBytes for 32 bit JVMs) used for memory mapping. + Especially on 32 bit platform, the address space can be very fragmented, + so large index files cannot be mapped. + Using a lower chunk size makes the directory implementation a little + bit slower (as the correct chunk must be resolved on each seek) + but the chance is higher that mmap does not fail. On 64 bit + Java platforms, this parameter should always be {@link Integer#MAX_VALUE}, + as the adress space is big enough. + + + + Returns the current mmap chunk size. + + + + + Creates an IndexInput for the file with the given name. + + + Creates an IndexOutput for the file with the given name. + + + Expert: A Scorer for documents matching a Term. + + + Construct a TermScorer. + + + The weight of the Term in the query. + + An iterator over the documents matching the Term. + + The Similarity implementation to be used for score + computations. + + The field norms of the document fields for the Term. + + + + use {@link #Score(Collector)} instead. + + + + use {@link #Score(Collector, int, int)} instead. + + + + use {@link #DocID()} instead. + + + + Advances to the next document matching the query.
+ The iterator over the matching documents is buffered using + {@link TermDocs#Read(int[],int[])}. + +
+ true iff there is another document matching the query. + + use {@link #NextDoc()} instead. + +
+ + Advances to the next document matching the query.
+ The iterator over the matching documents is buffered using + {@link TermDocs#Read(int[],int[])}. + +
+ the document matching the query or -1 if there are no more documents. + +
+ + Skips to the first match beyond the current whose document number is + greater than or equal to a given target.
+ The implementation uses {@link TermDocs#SkipTo(int)}. + +
+ The target document number. + + true iff there is such a match. + + use {@link #Advance(int)} instead. + +
+ + Advances to the first match beyond the current whose document number is + greater than or equal to a given target.
+ The implementation uses {@link TermDocs#SkipTo(int)}. + +
+ The target document number. + + the matching document or -1 if none exist. + +
+ + Returns an explanation of the score for a document. +
When this method is used, the {@link #Next()} method + and the {@link #Score(HitCollector)} method should not be used. +
+ The document number for the explanation. + +
+ + Returns a string representation of this TermScorer. + + + The BoostingTermQuery is very similar to the {@link Lucene.Net.Search.Spans.SpanTermQuery} except + that it factors in the value of the payload located at each of the positions where the + {@link Lucene.Net.Index.Term} occurs. +

+ In order to take advantage of this, you must override {@link Lucene.Net.Search.Similarity#ScorePayload(String, byte[],int,int)} + which returns 1 by default. +

+ Payload scores are averaged across term occurrences in the document. + +

+ + + + See {@link Lucene.Net.Search.Payloads.PayloadTermQuery} + +
+ + Implements parallel search over a set of Searchables. + +

Applications usually need only call the inherited {@link #Search(Query)} + or {@link #Search(Query,Filter)} methods. +

+
+ + Creates a searchable which searches searchables. + + + TODO: parallelize this one too + + + A search implementation which spans a new thread for each + Searchable, waits for each search to complete and merge + the results back together. + + + + A search implementation allowing sorting which spans a new thread for each + Searchable, waits for each search to complete and merges + the results back together. + + + + Lower-level search API. + +

{@link Collector#Collect(int)} is called for every matching document. + +

Applications should only use this if they need all of the + matching documents. The high-level search API ({@link + Searcher#Search(Query)}) is usually more efficient, as it skips + non-high-scoring hits. + +

+ to match documents + + if non-null, a bitset used to eliminate some documents + + to receive hits + + TODO: parallelize this one too + +
+ + A thread subclass for searching a single searchable + + + Expert: obtains short field values from the + {@link Lucene.Net.Search.FieldCache FieldCache} + using getShorts() and makes those values + available as other numeric types, casting as needed. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+ for requirements + on the field. + +

NOTE: with the switch in 2.9 to segment-based + searching, if {@link #getValues} is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API. Alternatively, for + a short-term fix, you could wrap your ValueSource using + {@link MultiValueSource}, which costs more CPU per lookup + but will not consume double the FieldCache RAM.

+ + + +

Create a cached short field source with default string-to-short parser. +
+ + Create a cached short field source with a specific string-to-short parser. + + + Expert: obtains int field values from the + {@link Lucene.Net.Search.FieldCache FieldCache} + using getInts() and makes those values + available as other numeric types, casting as needed. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+ for requirements + on the field. + +

NOTE: with the switch in 2.9 to segment-based + searching, if {@link #getValues} is invoked with a + composite (multi-segment) reader, this can easily cause + double RAM usage for the values in the FieldCache. It's + best to switch your application to pass only atomic + (single segment) readers to this API. Alternatively, for + a short-term fix, you could wrap your ValueSource using + {@link MultiValueSource}, which costs more CPU per lookup + but will not consume double the FieldCache RAM.

+ + + +

Create a cached int field source with default string-to-int parser. +
+ + Create a cached int field source with a specific string-to-int parser. + + + Query that sets document score as a programmatic function of several (sub) scores: +
    +
  1. the score of its subQuery (any query)
  2. +
  3. (optional) the score of its ValueSourceQuery (or queries). + For most simple/convenient use cases this query is likely to be a + {@link Lucene.Net.Search.Function.FieldScoreQuery FieldScoreQuery}
  4. +
+ Subclasses can modify the computation by overriding {@link #getCustomScoreProvider}. + +

+ WARNING: The status of the Search.Function package is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. +

+
+ + Create a CustomScoreQuery over input subQuery. + the sub query whose scored is being customed. Must not be null. + + + + Create a CustomScoreQuery over input subQuery and a {@link ValueSourceQuery}. + the sub query whose score is being customed. Must not be null. + + a value source query whose scores are used in the custom score + computation. For most simple/convineient use case this would be a + {@link Lucene.Net.Search.Function.FieldScoreQuery FieldScoreQuery}. + This parameter is optional - it can be null or even an empty array. + + + + Create a CustomScoreQuery over input subQuery and a {@link ValueSourceQuery}. + the sub query whose score is being customized. Must not be null. + + value source queries whose scores are used in the custom score + computation. For most simple/convenient use case these would be + {@link Lucene.Net.Search.Function.FieldScoreQuery FieldScoreQueries}. + This parameter is optional - it can be null or even an empty array. + + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + Returns a {@link CustomScoreProvider} that calculates the custom scores + for the given {@link IndexReader}. The default implementation returns a default + implementation as specified in the docs of {@link CustomScoreProvider}. + @since 2.9.2 + + + + Compute a custom score by the subQuery score and a number of + ValueSourceQuery scores. + + The doc is relative to the current reader, which is + unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + Please override {@link #getCustomScoreProvider} and return a subclass + of {@link CustomScoreProvider} for the given {@link IndexReader}. + see CustomScoreProvider#customScore(int,float,float[]) + + + + Compute a custom score by the subQuery score and the ValueSourceQuery score. + + The doc is relative to the current reader, which is + unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + Please override {@link #getCustomScoreProvider} and return a subclass + of {@link CustomScoreProvider} for the given {@link IndexReader}. + @see CustomScoreProvider#customScore(int,float,float) + + + + Explain the custom score. + + The doc is relative to the current reader, which is + unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + Please override {@link #getCustomScoreProvider} and return a subclass + of {@link CustomScoreProvider} for the given {@link IndexReader}. + + + + Explain the custom score. + The doc is relative to the current reader, which is + unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + Please override {@link #getCustomScoreProvider} and return a subclass + of {@link CustomScoreProvider} for the given {@link IndexReader}. + + + + Checks if this is strict custom scoring. + In strict custom scoring, the ValueSource part does not participate in weight normalization. + This may be useful when one wants full control over how scores are modified, and does + not care about normalizing by the ValueSource part. + One particular case where this is useful if for testing this query. +

+ Note: only has effect when the ValueSource part is not null. +

+
+ + Set the strict mode of this query. + The strict mode to set. + + + + + + A short name of this query, used in {@link #ToString(String)}. + + + + + Creates a new instance of the provider class for the given IndexReader. + + + + + * Compute a custom score by the subQuery score and a number of + ValueSourceQuery scores. +

+ Subclasses can override this method to modify the custom score. +

+ If your custom scoring is different than the default herein you + should override at least one of the two customScore() methods. + If the number of ValueSourceQueries is always < 2 it is + sufficient to override the other + {@link #customScore(int, float, float) customScore()} + method, which is simpler. +

+ The default computation herein is a multiplication of given scores: +

+                ModifiedScore = valSrcScore * valSrcScores[0] * valSrcScores[1] * ...
+            
+
+ id of scored doc + score of that doc by the subQuery + scores of that doc by the ValueSourceQuery + custom score +
+ + + + Explain the custom score. + Whenever overriding {@link #customScore(int, float, float[])}, + this method should also be overridden to provide the correct explanation + for the part of the custom scoring. + + doc being explained + explanation for the sub-query part + explanation for the value source part + an explanation for the custom score + + + + Explain the custom score. + Whenever overriding {@link #customScore(int, float, float)}, + this method should also be overridden to provide the correct explanation + for the part of the custom scoring. + + + doc being explained + explanation for the sub-query part + explanation for the value source part + an explanation for the custom score + + + A scorer that applies a (callback) function on scores of the subQuery. + + + use {@link #NextDoc()} instead. + + + + use {@link #DocID()} instead. + + + + use {@link #Advance(int)} instead. + + + + Compares {@link Lucene.Net.Index.TermVectorEntry}s first by frequency and then by + the term (case-sensitive) + + + + + + Returns an array of TermVectorOffsetInfo in which the term is found. + + + The position in the array to get the offsets from + + An array of TermVectorOffsetInfo objects or the empty list + + + + + + Returns an array of positions in which the term is found. + Terms are identified by the index at which its number appears in the + term String array obtained from the indexOf method. + + + + Information about a segment such as it's name, directory, and files related + to the segment. + + *

NOTE: This API is new and still experimental + (subject to change suddenly in the next release)

+

+
+ + Copy everything from src SegmentInfo into our instance. + + + Construct a new SegmentInfo instance by reading a + previously saved SegmentInfo from input. + + + directory to load from + + format of the segments info file + + input handle to read segment info from + + + + Returns total size in bytes of all of files used by + this segment. + + + + Returns true if this field for this segment has saved a separate norms file (_<segment>_N.sX). + + + the field index to check + + + + Returns true if any fields in this segment have separate norms. + + + Increment the generation count for the norms file for + this field. + + + field whose norm file will be rewritten + + + + Get the file name for the norms file for this field. + + + field index + + + + Mark whether this segment is stored as a compound file. + + + true if this is a compound file; + else, false + + + + Returns true if this segment is stored as a compound + file; else, false. + + + + Save this segment's info. + + + Used for debugging + + + We consider another SegmentInfo instance equal if it + has the same dir and same name. + + + + This class tracks the number and position / offset parameters of terms + being added to the index. The information collected in this class is + also used to calculate the normalization factor for a field. + +

WARNING: This API is new and experimental, and may suddenly + change.

+

+
+ + Re-initialize the state, using this boost value. + boost value to use. + + + + Get the last processed term position. + the position + + + + Get total number of terms in this field. + the length + + + + Get the number of terms with positionIncrement == 0. + the numOverlap + + + + Get end offset of the last processed term. + the offset + + + + Get boost value. This is the cumulative product of + document boost and field boost for all field instances + sharing the same field name. + + the boost + + + + This is just a "splitter" class: it lets you wrap two + DocFieldConsumer instances as a single consumer. + + + + Provides support for converting longs to Strings, and back again. The strings + are structured so that lexicographic sorting order is preserved. + +

+ That is, if l1 is less than l2 for any two longs l1 and l2, then + NumberTools.longToString(l1) is lexicographically less than + NumberTools.longToString(l2). (Similarly for "greater than" and "equals".) + +

+ This class handles all long values (unlike + {@link Lucene.Net.Documents.DateField}). + +

+ For new indexes use {@link NumericUtils} instead, which + provides a sortable binary representation (prefix encoded) of numeric + values. + To index and efficiently query numeric values use {@link NumericField} + and {@link NumericRangeQuery}. + This class is included for use with existing + indices and will be removed in a future release. + +
+ + Equivalent to longToString(Long.MIN_VALUE) + + + Equivalent to longToString(Long.MAX_VALUE) + + + The length of (all) strings returned by {@link #longToString} + + + Converts a long to a String suitable for indexing. + + + Converts a String that was returned by {@link #longToString} back to a + long. + + + IllegalArgumentException + if the input is null + + NumberFormatException + if the input does not parse (it was not a String returned by + longToString()). + + + + A WhitespaceTokenizer is a tokenizer that divides text at whitespace. + Adjacent sequences of non-Whitespace characters form tokens. + + + + Construct a new WhitespaceTokenizer. + + + Construct a new WhitespaceTokenizer using a given {@link AttributeSource}. + + + Construct a new WhitespaceTokenizer using a given {@link Lucene.Net.Util.AttributeSource.AttributeFactory}. + + + Collects only characters which do not satisfy + {@link Character#isWhitespace(char)}. + + + + An {@link Analyzer} that filters {@link LetterTokenizer} + with {@link LowerCaseFilter} + + + +

Implements {@link LockFactory} using native OS file + locks. Note that because this LockFactory relies on + java.nio.* APIs for locking, any problems with those APIs + will cause locking to fail. Specifically, on certain NFS + environments the java.nio.* locks will fail (the lock can + incorrectly be double acquired) whereas {@link + SimpleFSLockFactory} worked perfectly in those same + environments. For NFS based access to an index, it's + recommended that you try {@link SimpleFSLockFactory} + first and work around the one limitation that a lock file + could be left when the JVM exits abnormally.

+ +

The primary benefit of {@link NativeFSLockFactory} is + that lock files will be properly removed (by the OS) if + the JVM has an abnormal exit.

+ +

Note that, unlike {@link SimpleFSLockFactory}, the existence of + leftover lock files in the filesystem on exiting the JVM + is fine because the OS will free the locks held against + these files even though the files still remain.

+ +

If you suspect that this or any other LockFactory is + not working properly in your environment, you can easily + test it by using {@link VerifyingLockFactory}, {@link + LockVerifyServer} and {@link LockStressTest}.

+ +

+ + +
+ + Create a NativeFSLockFactory instance, with null (unset) + lock directory. When you pass this factory to a {@link FSDirectory} + subclass, the lock directory is automatically set to the + directory itsself. Be sure to create one instance for each directory + your create! + + + + Create a NativeFSLockFactory instance, storing lock + files into the specified lockDirName: + + + where lock files are created. + + + + Create a NativeFSLockFactory instance, storing lock + files into the specified lockDir: + + + where lock files are created. + + + + Create a NativeFSLockFactory instance, storing lock + files into the specified lockDir: + + + where lock files are created. + + + + The results of a SpanQueryFilter. Wraps the BitSet and the position information from the SpanQuery + +

+ NOTE: This API is still experimental and subject to change. + + +

+
+ + + + + + + The bits for the Filter + + A List of {@link Lucene.Net.Search.SpanFilterResult.PositionInfo} objects + + Use {@link #SpanFilterResult(DocIdSet, List)} instead + + + + + The DocIdSet for the Filter + + A List of {@link Lucene.Net.Search.SpanFilterResult.PositionInfo} objects + + + + The first entry in the array corresponds to the first "on" bit. + Entries are increasing by document order + + A List of PositionInfo objects + + + + Use {@link #GetDocIdSet()} + + + + Returns the docIdSet + + + + A List of {@link Lucene.Net.Search.SpanFilterResult.StartEnd} objects + + + + + The end position of this match + + + + The Start position + The start position of this match + + + + Encapsulates sort criteria for returned hits. + +

The fields used to determine sort order must be carefully chosen. + Documents must contain a single term in such a field, + and the value of the term should indicate the document's relative position in + a given sort order. The field must be indexed, but should not be tokenized, + and does not need to be stored (unless you happen to want it back with the + rest of your document data). In other words: + +

document.add (new Field ("byNumber", Integer.toString(x), Field.Store.NO, Field.Index.NOT_ANALYZED));

+ + +

Valid Types of Values

+ +

There are four possible kinds of term values which may be put into + sorting fields: Integers, Longs, Floats, or Strings. Unless + {@link SortField SortField} objects are specified, the type of value + in the field is determined by parsing the first term in the field. + +

Integer term values should contain only digits and an optional + preceding negative sign. Values must be base 10 and in the range + Integer.MIN_VALUE and Integer.MAX_VALUE inclusive. + Documents which should appear first in the sort + should have low value integers, later documents high values + (i.e. the documents should be numbered 1..n where + 1 is the first and n the last). + +

Long term values should contain only digits and an optional + preceding negative sign. Values must be base 10 and in the range + Long.MIN_VALUE and Long.MAX_VALUE inclusive. + Documents which should appear first in the sort + should have low value integers, later documents high values. + +

Float term values should conform to values accepted by + {@link Float Float.valueOf(String)} (except that NaN + and Infinity are not supported). + Documents which should appear first in the sort + should have low values, later documents high values. + +

String term values can contain any valid String, but should + not be tokenized. The values are sorted according to their + {@link Comparable natural order}. Note that using this type + of term value has higher memory requirements than the other + two types. + +

Object Reuse

+ +

One of these objects can be + used multiple times and the sort order changed between usages. + +

This class is thread safe. + +

Memory Usage

+ +

Sorting uses of caches of term values maintained by the + internal HitQueue(s). The cache is static and contains an integer + or float array of length IndexReader.maxDoc() for each field + name for which a sort is performed. In other words, the size of the + cache in bytes is: + +

4 * IndexReader.maxDoc() * (# of different fields actually used to sort) + +

For String fields, the cache is larger: in addition to the + above array, the value of every term in the field is kept in memory. + If there are many unique terms in the field, this could + be quite large. + +

Note that the size of the cache is not affected by how many + fields are in the index and might be used to sort - only by + the ones actually used to sort a result set. + +

Created: Feb 12, 2004 10:53:57 AM + +

+ lucene 1.4 + + $Id: Sort.java 795179 2009-07-17 18:23:30Z mikemccand $ + +
+ + Represents sorting by computed relevance. Using this sort criteria returns + the same results as calling + {@link Searcher#Search(Query) Searcher#search()}without a sort criteria, + only with slightly more overhead. + + + + Represents sorting by index order. + + + Sorts by computed relevance. This is the same sort criteria as calling + {@link Searcher#Search(Query) Searcher#search()}without a sort criteria, + only with slightly more overhead. + + + + Sorts by the terms in field then by index order (document + number). The type of value in field is determined + automatically. + + + + + Please specify the type explicitly by + first creating a {@link SortField} and then use {@link + #Sort(SortField)} + + + + Sorts possibly in reverse by the terms in field then by + index order (document number). The type of value in field is + determined automatically. + + + + + Please specify the type explicitly by + first creating a {@link SortField} and then use {@link + #Sort(SortField)} + + + + Sorts in succession by the terms in each field. The type of value in + field is determined automatically. + + + + + Please specify the type explicitly by + first creating {@link SortField}s and then use {@link + #Sort(SortField[])} + + + + Sorts by the criteria in the given SortField. + + + Sorts in succession by the criteria in each SortField. + + + Sets the sort to the terms in field then by index order + (document number). + + Please specify the type explicitly by + first creating a {@link SortField} and then use {@link + #SetSort(SortField)} + + + + Sets the sort to the terms in field possibly in reverse, + then by index order (document number). + + Please specify the type explicitly by + first creating a {@link SortField} and then use {@link + #SetSort(SortField)} + + + + Sets the sort to the terms in each field in succession. + Please specify the type explicitly by + first creating {@link SortField}s and then use {@link + #SetSort(SortField[])} + + + + Sets the sort to the given criteria. + + + Sets the sort to the given criteria in succession. + + + Representation of the sort criteria. + Array of SortField objects used in this sort criteria + + + + Returns true if o is equal to this. + + + Returns a hash code value for this object. + + + Experimental class to get set of payloads for most standard Lucene queries. + Operates like Highlighter - IndexReader should only contain doc of interest, + best to use MemoryIndex. + +

+ + WARNING: The status of the Payloads feature is experimental. + The APIs introduced here might change in the future and will not be + supported anymore in such a case. + +

+
+ + that contains doc with payloads to extract + + + + Query should be rewritten for wild/fuzzy support. + + + + + payloads Collection + + IOException + + + This class is very similar to + {@link Lucene.Net.Search.Spans.SpanNearQuery} except that it factors + in the value of the payloads located at each of the positions where the + {@link Lucene.Net.Search.Spans.TermSpans} occurs. +

+ In order to take advantage of this, you must override + {@link Lucene.Net.Search.Similarity#ScorePayload(String, byte[],int,int)} + which returns 1 by default. +

+ Payload scores are aggregated using a pluggable {@link PayloadFunction}. + +

+ + +
+ + By default, uses the {@link PayloadFunction} to score the payloads, but + can be overridden to do other things. + + + The payloads + + The start position of the span being scored + + The end position of the span being scored + + + + + + + A {@link Filter} that only accepts numeric values within + a specified range. To use this, you must first index the + numeric values using {@link NumericField} (expert: {@link + NumericTokenStream}). + +

You create a new NumericRangeFilter with the static + factory methods, eg: + +

+            Filter f = NumericRangeFilter.newFloatRange("weight",
+            new Float(0.3f), new Float(0.10f),
+            true, true);
+            
+ + accepts all documents whose float valued "weight" field + ranges from 0.3 to 0.10, inclusive. + See {@link NumericRangeQuery} for details on how Lucene + indexes and searches numeric valued fields. + +

NOTE: This API is experimental and + might change in incompatible ways in the next + release. + +

+ 2.9 + + +
+ + Factory that creates a NumericRangeFilter, that filters a long + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that queries a long + range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that filters a int + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that queries a int + range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that filters a double + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that queries a double + range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that filters a float + range using the given precisionStep. + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Factory that creates a NumericRangeFilter, that queries a float + range using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + You can have half-open ranges (which are in fact </≤ or >/≥ queries) + by setting the min or max value to null. By setting inclusive to false, it will + match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + + + + Returns the field name for this filter + + + Returns true if the lower endpoint is inclusive + + + Returns true if the upper endpoint is inclusive + + + Returns the lower value of this range filter + + + Returns the upper value of this range filter + + + An iterator over {@link Hits} that provides lazy fetching of each document. + {@link Hits#Iterator()} returns an instance of this class. Calls to {@link #next()} + return a {@link Hit} instance. + + + Use {@link TopScoreDocCollector} and {@link TopDocs} instead. Hits will be removed in Lucene 3.0. + + + + Constructed from {@link Hits#Iterator()}. + + + true if current hit is less than the total number of {@link Hits}. + + + + Unsupported operation. + + + UnsupportedOperationException + + + Returns the total number of hits. + + + Returns a {@link Hit} instance representing the next hit in {@link Hits}. + + + Next {@link Hit}. + + + + This class wraps another ValueSource, but protects + against accidental double RAM usage in FieldCache when + a composite reader is passed to {@link #getValues}. + +

NOTE: this class adds a CPU penalty to every + lookup, as it must resolve the incoming document to the + right sub-reader using a binary search.

+ +

+ This class is temporary, to ease the + migration to segment-based searching. Please change your + code to not pass composite readers to these APIs. + +
+ + Fills in no-term-vectors for all docs we haven't seen + since the last doc that had term vectors. + + + + This stores a monotonically increasing set of <Term, TermInfo> pairs in a + Directory. A TermInfos can be written once, in order. + + + + The file format version, a negative number. + + + Expert: The fraction of terms in the "dictionary" which should be stored + in RAM. Smaller values use more memory, but make searching slightly + faster, while larger values use less memory and make searching slightly + slower. Searching is typically not dominated by dictionary lookup, so + tweaking this is rarely useful. + + + + Expert: The fraction of {@link TermDocs} entries stored in skip tables, + used to accellerate {@link TermDocs#SkipTo(int)}. Larger values result in + smaller indexes, greater acceleration, but fewer accelerable cases, while + smaller values result in bigger indexes, less acceleration and more + accelerable cases. More detailed experiments would be useful here. + + + + Expert: The maximum number of skip levels. Smaller values result in + slightly smaller indexes, but slower skipping in big posting lists. + + + + Adds a new <fieldNumber, termBytes>, TermInfo> pair to the set. + Term must be lexicographically greater than all previous Terms added. + TermInfo pointers must be positive and greater than all previous. + + + + Called to complete TermInfos creation. + + + This stores a monotonically increasing set of <Term, TermInfo> pairs in a + Directory. Pairs are accessed either by Term or by ordinal position the + set. + + + + Returns the number of term/value pairs in the set. + + + Returns the offset of the greatest index entry which is less than or equal to term. + + + Returns the TermInfo for a Term in the set, or null. + + + Returns the TermInfo for a Term in the set, or null. + + + Returns the position of a Term in the set or -1. + + + Returns an enumeration of all the Terms and TermInfos in the set. + + + Returns an enumeration of terms starting at or after the named term. + + + Per-thread resources managed by ThreadLocal + + + Increments the enumeration to the next element. True if one exists. + + + Optimized scan, without allocating new terms. + Return number of invocations to next(). + + + + Returns the current Term in the enumeration. + Initially invalid, valid after next() called for the first time. + + + + Returns the previous Term enumerated. Initially null. + + + Returns the current TermInfo in the enumeration. + Initially invalid, valid after next() called for the first time. + + + + Sets the argument to the current TermInfo in the enumeration. + Initially invalid, valid after next() called for the first time. + + + + Returns the docFreq from the current TermInfo in the enumeration. + Initially invalid, valid after next() called for the first time. + + + + Closes the enumeration to further activity, freeing resources. + + + Adds a new doc in this term. If this returns null + then we just skip consuming positions/payloads. + + + + Called when we are done adding docs to this term + + + Filters {@link StandardTokenizer} with {@link StandardFilter}, + {@link LowerCaseFilter} and {@link StopFilter}, using a list of English stop + words. + + +

+ You must specify the required {@link Version} compatibility when creating + StandardAnalyzer: +

+ +
+ $Id: StandardAnalyzer.java 829134 2009-10-23 17:18:53Z mikemccand $ + +
+ + Default maximum allowed token length + + + Specifies whether deprecated acronyms should be replaced with HOST type. + This is false by default to support backward compatibility. + + + this should be removed in the next release (3.0). + + See https://issues.apache.org/jira/browse/LUCENE-1068 + + + + + true if new instances of StandardTokenizer will + replace mischaracterized acronyms + + See https://issues.apache.org/jira/browse/LUCENE-1068 + + This will be removed (hardwired to true) in 3.0 + + + + + Set to true to have new + instances of StandardTokenizer replace mischaracterized + acronyms by default. Set to false to preserve the + previous (before 2.4) buggy behavior. Alternatively, + set the system property + Lucene.Net.Analysis.Standard.StandardAnalyzer.replaceInvalidAcronym + to false. + + See https://issues.apache.org/jira/browse/LUCENE-1068 + + This will be removed (hardwired to true) in 3.0 + + + + An array containing some common English words that are usually not + useful for searching. + + Use {@link #STOP_WORDS_SET} instead + + + + An unmodifiable set containing some common English words that are usually not + useful for searching. + + + + Builds an analyzer with the default stop words ({@link + #STOP_WORDS_SET}). + + Use {@link #StandardAnalyzer(Version)} instead. + + + + Builds an analyzer with the default stop words ({@link + #STOP_WORDS}). + + Lucene version to match See {@link + above} + + + + Builds an analyzer with the given stop words. + Use {@link #StandardAnalyzer(Version, Set)} + instead + + + + Builds an analyzer with the given stop words. + Lucene version to match See {@link + above} + + stop words + + + + Builds an analyzer with the given stop words. + Use {@link #StandardAnalyzer(Version, Set)} instead + + + + Builds an analyzer with the stop words from the given file. + + + Use {@link #StandardAnalyzer(Version, File)} + instead + + + + Builds an analyzer with the stop words from the given file. + + + Lucene version to match See {@link + above} + + File to read stop words from + + + + Builds an analyzer with the stop words from the given reader. + + + Use {@link #StandardAnalyzer(Version, Reader)} + instead + + + + Builds an analyzer with the stop words from the given reader. + + + Lucene version to match See {@link + above} + + Reader to read stop words from + + + + + Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer + + See https://issues.apache.org/jira/browse/LUCENE-1068 + + + Remove in 3.X and make true the only valid value + + + + The stopwords to use + + Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer + + See https://issues.apache.org/jira/browse/LUCENE-1068 + + + Remove in 3.X and make true the only valid value + + + + The stopwords to use + + Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer + + See https://issues.apache.org/jira/browse/LUCENE-1068 + + + Remove in 3.X and make true the only valid value + + + + + The stopwords to use + + Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer + + See https://issues.apache.org/jira/browse/LUCENE-1068 + + + Remove in 3.X and make true the only valid value + + + + The stopwords to use + + Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer + + See https://issues.apache.org/jira/browse/LUCENE-1068 + + + Remove in 3.X and make true the only valid value + + + + Constructs a {@link StandardTokenizer} filtered by a {@link + StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. + + + + Set maximum allowed token length. If a token is seen + that exceeds this length then it is discarded. This + setting only takes effect the next time tokenStream or + reusableTokenStream is called. + + + + + + + + Use {@link #tokenStream} instead + + + + + true if this Analyzer is replacing mischaracterized acronyms in the StandardTokenizer + + See https://issues.apache.org/jira/browse/LUCENE-1068 + + This will be removed (hardwired to true) in 3.0 + + + + + Set to true if this Analyzer is replacing mischaracterized acronyms in the StandardTokenizer + + See https://issues.apache.org/jira/browse/LUCENE-1068 + + This will be removed (hardwired to true) in 3.0 + + +
+
diff --git a/src/Orchard.Core.Tests/Indexing/DefaultIndexProviderTests.cs b/src/Orchard.Core.Tests/Indexing/DefaultIndexProviderTests.cs new file mode 100644 index 000000000..0c983332d --- /dev/null +++ b/src/Orchard.Core.Tests/Indexing/DefaultIndexProviderTests.cs @@ -0,0 +1,146 @@ +using System; +using System.IO; +using System.Linq; +using Autofac; +using NUnit.Framework; +using Orchard.Environment.Configuration; +using Orchard.FileSystems.AppData; +using Orchard.Indexing; +using Orchard.Core.Indexing.Lucene; + +namespace Orchard.Tests.Indexing { + public class DefaultIndexProviderTests { + private IContainer _container; + private IIndexProvider _provider; + private IAppDataFolder _appDataFolder; + private ShellSettings _shellSettings; + private readonly string _basePath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); + + [TestFixtureTearDown] + public void Clean() { + Directory.Delete(_basePath, true); + } + + [SetUp] + public void Setup() { + if (Directory.Exists(_basePath)) { + Directory.Delete(_basePath, true); + } + Directory.CreateDirectory(_basePath); + + _appDataFolder = new AppDataFolder(); + _appDataFolder.SetBasePath(_basePath); + + var builder = new ContainerBuilder(); + builder.RegisterType().As(); + builder.RegisterInstance(_appDataFolder).As(); + + // setting up a ShellSettings instance + _shellSettings = new ShellSettings { Name = "My Site" }; + builder.RegisterInstance(_shellSettings).As(); + + _container = builder.Build(); + _provider = _container.Resolve(); + } + + private string[] Indexes() { + return new DirectoryInfo(Path.Combine(_basePath, "Sites", "My Site", "Indexes")).GetDirectories().Select(d => d.Name).ToArray(); + } + + [Test] + public void IndexProviderShouldCreateNewIndex() { + Assert.That(Indexes().Length, Is.EqualTo(0)); + + _provider.CreateIndex("default"); + Assert.That(Indexes().Length, Is.EqualTo(1)); + } + + [Test] + public void IndexProviderShouldOverwriteAlreadyExistingIndex() { + _provider.CreateIndex("default"); + _provider.CreateIndex("default"); + } + + [Test] + public void IndexProviderShouldDeleteExistingIndex() { + Assert.That(Indexes().Length, Is.EqualTo(0)); + + _provider.CreateIndex("default"); + Assert.That(Indexes().Length, Is.EqualTo(1)); + + _provider.DeleteIndex("default"); + Assert.That(Indexes().Length, Is.EqualTo(0)); + } + + [Test] + public void IndexProviderShouldListExistingIndexes() { + Assert.That(Indexes().Length, Is.EqualTo(0)); + + _provider.CreateIndex("default"); + Assert.That(Indexes().Length, Is.EqualTo(1)); + Assert.That(Indexes()[0], Is.EqualTo("default")); + + _provider.CreateIndex("foo"); + Assert.That(Indexes().Length, Is.EqualTo(2)); + } + + [Test] + public void ANewIndexShouldBeEmpty() { + _provider.CreateIndex("default"); + var searchBuilder = _provider.CreateSearchBuilder("default"); + var hits = searchBuilder.Search(); + + Assert.That(hits.Count(), Is.EqualTo(0)); + } + + [Test] + public void DocumentsShouldBeSearchableById() { + _provider.CreateIndex("default"); + + _provider.Store("default", _provider.New(42)); + + var searchBuilder = _provider.CreateSearchBuilder("default"); + + var hit = searchBuilder.Get(42); + Assert.IsNotNull(hit); + Assert.That(hit.Id, Is.EqualTo(42)); + + hit = searchBuilder.Get(1); + Assert.IsNull(hit); + } + + [Test] + public void PropertiesShouldNotBeLost() { + _provider.CreateIndex("default"); + _provider.Store("default", _provider.New(42).Add("prop1", "value1")); + + var hit = _provider.CreateSearchBuilder("default").Get(42); + + Assert.IsNotNull(hit); + Assert.That(hit.Id, Is.EqualTo(42)); + Assert.That(hit.GetString("prop1"), Is.EqualTo("value1")); + + } + + [Test] + public void ShouldHandleMultipleIndexes() { + _provider.CreateIndex("default1"); + _provider.Store("default1", _provider.New(1)); + + _provider.CreateIndex("default2"); + _provider.Store("default2", _provider.New(2)); + + _provider.CreateIndex("default3"); + _provider.Store("default3", _provider.New(3)); + + Assert.IsNotNull(_provider.CreateSearchBuilder("default1").Get(1)); + Assert.IsNotNull(_provider.CreateSearchBuilder("default2").Get(2)); + Assert.IsNotNull(_provider.CreateSearchBuilder("default3").Get(3)); + + Assert.IsNull(_provider.CreateSearchBuilder("default1").Get(2)); + Assert.IsNull(_provider.CreateSearchBuilder("default2").Get(3)); + Assert.IsNull(_provider.CreateSearchBuilder("default3").Get(1)); + + } + } +} diff --git a/src/Orchard.Core.Tests/Indexing/DefaultSearchBuilderTests.cs b/src/Orchard.Core.Tests/Indexing/DefaultSearchBuilderTests.cs new file mode 100644 index 000000000..9ffa62d6c --- /dev/null +++ b/src/Orchard.Core.Tests/Indexing/DefaultSearchBuilderTests.cs @@ -0,0 +1,181 @@ +using System; +using System.IO; +using System.Linq; +using Autofac; +using NUnit.Framework; +using Orchard.Environment.Configuration; +using Orchard.FileSystems.AppData; +using Orchard.Indexing; +using Orchard.Core.Indexing.Lucene; + +namespace Orchard.Tests.Indexing { + public class DefaultSearchBuilderTests { + private IContainer _container; + private IIndexProvider _provider; + private IAppDataFolder _appDataFolder; + private ShellSettings _shellSettings; + private readonly string _basePath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); + + [TestFixtureTearDown] + public void Clean() { + Directory.Delete(_basePath, true); + } + + [SetUp] + public void Setup() { + if (Directory.Exists(_basePath)) { + Directory.Delete(_basePath, true); + } + Directory.CreateDirectory(_basePath); + + + _appDataFolder = new AppDataFolder(); + _appDataFolder.SetBasePath(_basePath); + + var builder = new ContainerBuilder(); + builder.RegisterType().As(); + builder.RegisterInstance(_appDataFolder).As(); + + // setting up a ShellSettings instance + _shellSettings = new ShellSettings { Name = "My Site" }; + builder.RegisterInstance(_shellSettings).As(); + + _container = builder.Build(); + _provider = _container.Resolve(); + } + + private ISearchBuilder _searchBuilder { get { return _provider.CreateSearchBuilder("default"); } } + + [Test] + public void SearchTermsShouldBeFoundInMultipleFields() { + _provider.CreateIndex("default"); + _provider.Store("default", + _provider.New(42) + .Add("title", "title1 title2 title3") + .Add("date", new DateTime(2010, 05, 28, 14, 13, 56, 123)) + ); + + Assert.IsNotNull(_provider.CreateSearchBuilder("default").Get(42)); + + Assert.IsNotNull(_provider.CreateSearchBuilder("default").WithField("title", "title1").Search().FirstOrDefault()); + Assert.IsNotNull(_provider.CreateSearchBuilder("default").WithField("title", "title2").Search().FirstOrDefault()); + Assert.IsNotNull(_provider.CreateSearchBuilder("default").WithField("title", "title3").Search().FirstOrDefault()); + Assert.IsNull(_provider.CreateSearchBuilder("default").WithField("title", "title4").Search().FirstOrDefault()); + Assert.IsNotNull(_provider.CreateSearchBuilder("default").WithField("title", "title").Search().FirstOrDefault()); + + } + + [Test] + public void ShouldSearchById() { + _provider.CreateIndex("default"); + _provider.Store("default", _provider.New(1)); + _provider.Store("default", _provider.New(2)); + _provider.Store("default", _provider.New(3)); + + + Assert.That(_searchBuilder.Get(1).Id, Is.EqualTo(1)); + Assert.That(_searchBuilder.Get(2).Id, Is.EqualTo(2)); + Assert.That(_searchBuilder.Get(3).Id, Is.EqualTo(3)); + } + + [Test] + public void ShouldSearchWithField() { + _provider.CreateIndex("default"); + _provider.Store("default", _provider.New(1).Add("title", "cat")); + _provider.Store("default", _provider.New(2).Add("title", "dog")); + _provider.Store("default", _provider.New(3).Add("title", "cat")); + + + Assert.That(_searchBuilder.WithField("title", "cat").Search().Count(), Is.EqualTo(2)); + Assert.That(_searchBuilder.WithField("title", "cat").Search().Any(hit => new[] { 1, 3 }.Contains(hit.Id)), Is.True); + + } + + [Test] + public void ShouldCountResultsOnly() { + _provider.CreateIndex("default"); + _provider.Store("default", _provider.New(1).Add("title", "cat")); + _provider.Store("default", _provider.New(2).Add("title", "dog")); + _provider.Store("default", _provider.New(3).Add("title", "cat")); + + Assert.That(_searchBuilder.WithField("title", "dog").Count(), Is.EqualTo(1)); + Assert.That(_searchBuilder.WithField("title", "cat").Count(), Is.EqualTo(2)); + Assert.That(_searchBuilder.WithField("title", "c").Count(), Is.EqualTo(2)); + + } + + [Test] + public void ShouldFilterByDate() { + _provider.CreateIndex("default"); + _provider.Store("default", _provider.New(1).Add("date", new DateTime(2010, 05, 28, 12, 30, 15))); + _provider.Store("default", _provider.New(2).Add("date", new DateTime(2010, 05, 28, 12, 30, 30))); + _provider.Store("default", _provider.New(3).Add("date", new DateTime(2010, 05, 28, 12, 30, 45))); + + Assert.That(_searchBuilder.After("date", new DateTime(2010, 05, 28, 12, 30, 15)).Count(), Is.EqualTo(3)); + Assert.That(_searchBuilder.Before("date", new DateTime(2010, 05, 28, 12, 30, 45)).Count(), Is.EqualTo(3)); + Assert.That(_searchBuilder.After("date", new DateTime(2010, 05, 28, 12, 30, 15)).Before("date", new DateTime(2010, 05, 28, 12, 30, 45)).Count(), Is.EqualTo(3)); + Assert.That(_searchBuilder.After("date", new DateTime(2010, 05, 28, 12, 30, 16)).Before("date", new DateTime(2010, 05, 28, 12, 30, 44)).Count(), Is.EqualTo(1)); + Assert.That(_searchBuilder.After("date", new DateTime(2010, 05, 28, 12, 30, 46)).Count(), Is.EqualTo(0)); + Assert.That(_searchBuilder.Before("date", new DateTime(2010, 05, 28, 12, 30, 1)).Count(), Is.EqualTo(0)); + } + + [Test] + public void ShouldSliceResults() { + _provider.CreateIndex("default"); + _provider.Store("default", _provider.New(1)); + _provider.Store("default", _provider.New(22)); + _provider.Store("default", _provider.New(333)); + _provider.Store("default", _provider.New(4444)); + _provider.Store("default", _provider.New(55555)); + + + Assert.That(_searchBuilder.Count(), Is.EqualTo(5)); + Assert.That(_searchBuilder.Slice(0, 3).Count(), Is.EqualTo(3)); + Assert.That(_searchBuilder.Slice(1, 3).Count(), Is.EqualTo(3)); + Assert.That(_searchBuilder.Slice(3, 3).Count(), Is.EqualTo(2)); + + // Count() and Search() should return the same results + Assert.That(_searchBuilder.Search().Count(), Is.EqualTo(5)); + Assert.That(_searchBuilder.Slice(0, 3).Search().Count(), Is.EqualTo(3)); + Assert.That(_searchBuilder.Slice(1, 3).Search().Count(), Is.EqualTo(3)); + Assert.That(_searchBuilder.Slice(3, 3).Search().Count(), Is.EqualTo(2)); + } + + [Test] + public void ShouldSortByRelevance() { + _provider.CreateIndex("default"); + _provider.Store("default", _provider.New(1).Add("body", "michaelson is in the kitchen")); + _provider.Store("default", _provider.New(2).Add("body", "michael as a cousin named michael")); + _provider.Store("default", _provider.New(3).Add("body", "speak inside the mic")); + _provider.Store("default", _provider.New(4).Add("body", "a dog is pursuing a cat")); + _provider.Store("default", _provider.New(5).Add("body", "the elephant can't catch up the dog")); + + var michael = _searchBuilder.WithField("body", "mic").Search().ToList(); + Assert.That(michael.Count(), Is.EqualTo(3)); + Assert.That(michael[0].Score >= michael[1].Score, Is.True); + + // Sorting on score is always descending + michael = _searchBuilder.WithField("body", "mic").Ascending().Search().ToList(); + Assert.That(michael.Count(), Is.EqualTo(3)); + Assert.That(michael[0].Score >= michael[1].Score, Is.True); + } + + [Test] + public void ShouldSortByDate() { + _provider.CreateIndex("default"); + _provider.Store("default", _provider.New(1).Add("date", new DateTime(2010, 05, 28, 12, 30, 15))); + _provider.Store("default", _provider.New(2).Add("date", new DateTime(2010, 05, 28, 12, 30, 30))); + _provider.Store("default", _provider.New(3).Add("date", new DateTime(2010, 05, 28, 12, 30, 45))); + + var date = _searchBuilder.SortBy("date").Search().ToList(); + Assert.That(date.Count(), Is.EqualTo(3)); + Assert.That(date[0].GetDateTime("date") > date[1].GetDateTime("date"), Is.True); + Assert.That(date[1].GetDateTime("date") > date[2].GetDateTime("date"), Is.True); + + date = _searchBuilder.SortBy("date").Ascending().Search().ToList(); + Assert.That(date.Count(), Is.EqualTo(3)); + Assert.That(date[0].GetDateTime("date") < date[1].GetDateTime("date"), Is.True); + Assert.That(date[1].GetDateTime("date") < date[2].GetDateTime("date"), Is.True); + } + } +} diff --git a/src/Orchard.Core.Tests/Orchard.Core.Tests.csproj b/src/Orchard.Core.Tests/Orchard.Core.Tests.csproj index 2450e726b..307f9ed26 100644 --- a/src/Orchard.Core.Tests/Orchard.Core.Tests.csproj +++ b/src/Orchard.Core.Tests/Orchard.Core.Tests.csproj @@ -104,6 +104,8 @@ + + diff --git a/src/Orchard.Web/Core/Common/Handlers/BodyAspectHandler.cs b/src/Orchard.Web/Core/Common/Handlers/BodyAspectHandler.cs index 1aebd14b1..8368eb77b 100644 --- a/src/Orchard.Web/Core/Common/Handlers/BodyAspectHandler.cs +++ b/src/Orchard.Web/Core/Common/Handlers/BodyAspectHandler.cs @@ -8,6 +8,10 @@ namespace Orchard.Core.Common.Handlers { public class BodyAspectHandler : ContentHandler { public BodyAspectHandler(IRepository bodyRepository) { Filters.Add(StorageFilter.For(bodyRepository)); + + OnIndexing((context, bodyAspect) => { + context.IndexDocument.Add("body", bodyAspect.Record.Text); + }); } } } \ No newline at end of file diff --git a/src/Orchard.Web/Core/Indexing/Lucene/DefaultIndexDocument.cs b/src/Orchard.Web/Core/Indexing/Lucene/DefaultIndexDocument.cs new file mode 100644 index 000000000..b170f28a3 --- /dev/null +++ b/src/Orchard.Web/Core/Indexing/Lucene/DefaultIndexDocument.cs @@ -0,0 +1,101 @@ +using System; +using System.Collections.Generic; +using Lucene.Net.Documents; +using Orchard.Indexing; + +namespace Orchard.Core.Indexing.Lucene { + + public class DefaultIndexDocument : IIndexDocument { + + public List Fields { get; private set; } + private AbstractField _previousField; + public int Id { get; private set; } + + public DefaultIndexDocument(int documentId) { + Fields = new List(); + SetContentItemId(documentId); + } + + public IIndexDocument Add(string name, string value) { + AppendPreviousField(); + _previousField = new Field(name, value, Field.Store.YES, Field.Index.ANALYZED); + return this; + } + + public IIndexDocument Add(string name, DateTime value) { + AppendPreviousField(); + _previousField = new Field(name, DateTools.DateToString(value, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.NOT_ANALYZED); + return this; + } + public IIndexDocument Add(string name, int value) { + AppendPreviousField(); + _previousField = new NumericField(name, Field.Store.YES, true).SetIntValue(value); + return this; + } + + public IIndexDocument Add(string name, bool value) { + AppendPreviousField(); + _previousField = new Field(name, value.ToString().ToLower(), Field.Store.YES, Field.Index.NOT_ANALYZED); + return this; + } + + public IIndexDocument Add(string name, float value) { + AppendPreviousField(); + _previousField = new NumericField(name, Field.Store.YES, true).SetFloatValue(value); + return this; + } + + public IIndexDocument Add(string name, object value) { + AppendPreviousField(); + _previousField = new Field(name, value.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED); + return this; + } + + public IIndexDocument Store(bool store) { + EnsurePreviousField(); + if(store != _previousField.IsStored()) { + var index = _previousField.IsTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED; + _previousField = new Field(_previousField.Name(), _previousField.StringValue(), store ? Field.Store.YES : Field.Store.NO, index); + } + return this; + } + + public IIndexDocument Analyze(bool analyze) { + EnsurePreviousField(); + if (_previousField.IsTokenized() == analyze) { + return this; + } + + var index = analyze ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED; + var store = _previousField.IsStored() ? Field.Store.YES : Field.Store.NO; + _previousField = new Field(_previousField.Name(), _previousField.StringValue(), store, index); + return this; + } + + public IIndexDocument SetContentItemId(int id) { + Id = id; + Fields.Add(new Field("id", id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); + return this; + } + + private void AppendPreviousField() { + if (_previousField == null) { + return; + } + + Fields.Add(_previousField); + _previousField = null; + } + + + public void PrepareForIndexing() { + AppendPreviousField(); + } + + private void EnsurePreviousField() { + if(_previousField == null) { + throw new ApplicationException("Operation can't be applied in this context."); + } + } + } +} \ No newline at end of file diff --git a/src/Orchard.Web/Core/Indexing/Lucene/DefaultIndexProvider.cs b/src/Orchard.Web/Core/Indexing/Lucene/DefaultIndexProvider.cs new file mode 100644 index 000000000..d7df669a5 --- /dev/null +++ b/src/Orchard.Web/Core/Indexing/Lucene/DefaultIndexProvider.cs @@ -0,0 +1,128 @@ +using System; +using System.IO; +using Lucene.Net.Analysis; +using Lucene.Net.Analysis.Standard; +using Lucene.Net.Documents; +using Lucene.Net.Index; +using Lucene.Net.Store; +using Orchard.Environment.Configuration; +using Orchard.FileSystems.AppData; +using Orchard.Indexing; +using Directory = Lucene.Net.Store.Directory; +using Version = Lucene.Net.Util.Version; +using Orchard.Logging; + +namespace Orchard.Core.Indexing.Lucene { + /// + /// Represents the default implementation of an IIndexProvider based on Lucene + /// + public class DefaultIndexProvider : IIndexProvider { + private readonly IAppDataFolder _appDataFolder; + private readonly ShellSettings _shellSettings; + public static readonly Version LuceneVersion = Version.LUCENE_29; + private readonly Analyzer _analyzer = new StandardAnalyzer(LuceneVersion); + private readonly string _basePath; + + public ILogger Logger { get; set; } + + public DefaultIndexProvider(IAppDataFolder appDataFolder, ShellSettings shellSettings) { + _appDataFolder = appDataFolder; + _shellSettings = shellSettings; + + // TODO: (sebros) Find a common way to get where tenant's specific files should go. "Sites/Tenant" is hard coded in multiple places + _basePath = Path.Combine("Sites", _shellSettings.Name, "Indexes"); + + Logger = NullLogger.Instance; + + // Ensures the directory exists + var directory = new DirectoryInfo(_appDataFolder.MapPath(_basePath)); + if(!directory.Exists) { + directory.Create(); + } + } + + protected virtual Directory GetDirectory(string indexName) { + var directoryInfo = new DirectoryInfo(_appDataFolder.MapPath(Path.Combine(_basePath, indexName))); + return FSDirectory.Open(directoryInfo); + } + + private static Document CreateDocument(DefaultIndexDocument indexDocument) { + var doc = new Document(); + + indexDocument.PrepareForIndexing(); + foreach(var field in indexDocument.Fields) { + doc.Add(field); + } + return doc; + } + + public bool Exists(string indexName) { + return new DirectoryInfo(_appDataFolder.MapPath(Path.Combine(_basePath, indexName))).Exists; + } + + public void CreateIndex(string indexName) { + var writer = new IndexWriter(GetDirectory(indexName), _analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); + writer.Close(); + + Logger.Information("Index [{0}] created", indexName); + } + + public void DeleteIndex(string indexName) { + new DirectoryInfo(Path.Combine(_appDataFolder.MapPath(Path.Combine(_basePath, indexName)))) + .Delete(true); + } + + public void Store(string indexName, IIndexDocument indexDocument) { + Store(indexName, (DefaultIndexDocument)indexDocument); + } + + public void Store(string indexName, DefaultIndexDocument indexDocument) { + var writer = new IndexWriter(GetDirectory(indexName), _analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED); + + try { + var doc = CreateDocument(indexDocument); + writer.AddDocument(doc); + Logger.Debug("Document [{0}] indexed", indexDocument.Id); + } + catch ( Exception ex ) { + Logger.Error(ex, "An unexpected error occured while removing the document [{0}] from the index [{1}].", indexDocument.Id, indexName); + } + finally { + writer.Close(); + } + + } + + public void Delete(string indexName, int id) { + var reader = IndexReader.Open(GetDirectory(indexName), false); + + try { + var term = new Term("id", id.ToString()); + if ( reader.DeleteDocuments(term) != 0 ) { + Logger.Error("The document [{0}] could not be removed from the index [{1}]", id, indexName); + } + else { + Logger.Debug("Document [{0}] removed from index", id); + } + } + catch ( Exception ex ) { + Logger.Error(ex, "An unexpected error occured while removing the document [{0}] from the index [{1}].", id, indexName); + } + finally { + reader.Close(); + } + } + + public IIndexDocument New(int documentId) { + return new DefaultIndexDocument(documentId); + } + + public ISearchBuilder CreateSearchBuilder(string indexName) { + return new DefaultSearchBuilder(GetDirectory(indexName)); + } + + public IIndexDocument Get(string indexName, int id) { + throw new NotImplementedException(); + } + } +} diff --git a/src/Orchard.Web/Core/Indexing/Lucene/DefaultSearchBuilder.cs b/src/Orchard.Web/Core/Indexing/Lucene/DefaultSearchBuilder.cs new file mode 100644 index 000000000..c73192c52 --- /dev/null +++ b/src/Orchard.Web/Core/Indexing/Lucene/DefaultSearchBuilder.cs @@ -0,0 +1,194 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Store; +using Orchard.Logging; +using Lucene.Net.Documents; +using Orchard.Indexing; + +namespace Orchard.Core.Indexing.Lucene { + public class DefaultSearchBuilder : ISearchBuilder { + + private const int MaxResults = Int16.MaxValue; + + private readonly Directory _directory; + + private readonly Dictionary _fields; + private int _count; + private int _skip; + private readonly Dictionary _before; + private readonly Dictionary _after; + private string _sort; + private bool _sortDescending; + + public ILogger Logger { get; set; } + + public DefaultSearchBuilder(Directory directory) { + _directory = directory; + Logger = NullLogger.Instance; + + _count = MaxResults; + _skip = 0; + _before = new Dictionary(); + _after = new Dictionary(); + _fields = new Dictionary(); + _sort = String.Empty; + _sortDescending = true; + } + + public ISearchBuilder Parse(string query) { + return this; + } + + public ISearchBuilder WithField(string field, string value) { + return WithField(field, value, true); + } + + public ISearchBuilder WithField(string field, string value, bool wildcardSearch) { + + _fields[field] = value.Split(' ') + .Where(k => !String.IsNullOrWhiteSpace(k)) + .Select(k => wildcardSearch ? (Query)new PrefixQuery(new Term(field, k)) : new TermQuery(new Term(k))) + .ToArray(); + + return this; + } + + public ISearchBuilder After(string name, DateTime date) { + _after[name] = date; + return this; + } + + public ISearchBuilder Before(string name, DateTime date) { + _before[name] = date; + return this; + } + + public ISearchBuilder SortBy(string name) { + _sort = name; + return this; + } + + public ISearchBuilder Ascending() { + _sortDescending = false; + return this; + } + + public ISearchBuilder Slice(int skip, int count) { + if ( skip < 0 ) { + throw new ArgumentException("Skip must be greater or equal to zero"); + } + + if ( count <= 0 ) { + throw new ArgumentException("Count must be greater than zero"); + } + + _skip = skip; + _count = count; + + return this; + } + + private Query CreateQuery() { + var query = new BooleanQuery(); + + if ( _fields.Keys.Count > 0 ) { // apply specific filters if defined + foreach ( var filters in _fields.Values ) { + foreach(var filter in filters) + query.Add(filter, BooleanClause.Occur.SHOULD); + } + } + + // apply date range filter ? + foreach(string name in _before.Keys.Concat(_after.Keys)) { + if ((_before.ContainsKey(name) && _before[name] != DateTime.MaxValue) || (_after.ContainsKey(name) && _after[name] != DateTime.MinValue)) { + var filter = new TermRangeQuery("date", + DateTools.DateToString(_after.ContainsKey(name) ? _after[name] : DateTime.MinValue, DateTools.Resolution.SECOND), + DateTools.DateToString(_before.ContainsKey(name) ? _before[name] : DateTime.MaxValue, DateTools.Resolution.SECOND), + true, true); + query.Add(filter, BooleanClause.Occur.MUST); + } + } + + if ( query.Clauses().Count == 0 ) { // get all documents ? + query.Add(new TermRangeQuery("id", "0", "9", true, true), BooleanClause.Occur.SHOULD); + } + + Logger.Debug("New search query: {0}", query.ToString()); + return query; + } + + public IEnumerable Search() { + var query = CreateQuery(); + + var searcher = new IndexSearcher(_directory, true); + + try { + var sort = String.IsNullOrEmpty(_sort) + ? Sort.RELEVANCE + : new Sort(new SortField(_sort, CultureInfo.InvariantCulture, _sortDescending)); + var collector = TopFieldCollector.create( + sort, + _count + _skip, + false, + true, + false, + true); + + searcher.Search(query, collector); + + var results = new List(); + + foreach ( var scoreDoc in collector.TopDocs().scoreDocs.Skip(_skip) ) { + results.Add(new DefaultSearchHit(searcher.Doc(scoreDoc.doc), scoreDoc.score)); + } + + Logger.Information("Search results: {0}", results.Count); + return results; + } + finally { + searcher.Close(); + } + + } + + public int Count() { + var query = CreateQuery(); + + var searcher = new IndexSearcher(_directory, true); + try { + var hits = searcher.Search(query, Int16.MaxValue); + Logger.Information("Search results: {0}", hits.scoreDocs.Length); + var length = hits.scoreDocs.Length; + return Math.Min(length - _skip, _count) ; + } + finally { + searcher.Close(); + } + + } + + public ISearchHit Get(int documentId) { + var query = new TermQuery(new Term("id", documentId.ToString())); + + var searcher = new IndexSearcher(_directory, true); + try { + var hits = searcher.Search(query, 1); + Logger.Information("Search results: {0}", hits.scoreDocs.Length); + if ( hits.scoreDocs.Length > 0 ) { + return new DefaultSearchHit(searcher.Doc(hits.scoreDocs[0].doc), hits.scoreDocs[0].score); + } + else { + return null; + } + } + finally { + searcher.Close(); + } + } + + } +} diff --git a/src/Orchard.Web/Core/Indexing/Lucene/DefaultSearchHit.cs b/src/Orchard.Web/Core/Indexing/Lucene/DefaultSearchHit.cs new file mode 100644 index 000000000..e98a0c1bf --- /dev/null +++ b/src/Orchard.Web/Core/Indexing/Lucene/DefaultSearchHit.cs @@ -0,0 +1,40 @@ +using Lucene.Net.Documents; +using System.Globalization; +using Lucene.Net.Util; +using Orchard.Indexing; + +namespace Orchard.Core.Indexing.Lucene { + public class DefaultSearchHit : ISearchHit { + private readonly Document _doc; + private readonly float _score; + + public float Score { get { return _score; } } + + public DefaultSearchHit(Document document, float score) { + _doc = document; + _score = score; + } + + public int Id { get { return int.Parse(GetString("id")); } } + + public int GetInt(string name) { + return NumericUtils.PrefixCodedToInt(_doc.GetField(name).StringValue()); + } + + public float GetFloat(string name) { + return float.Parse(_doc.GetField(name).StringValue(), CultureInfo.InvariantCulture); + } + + public bool GetBoolean(string name) { + return bool.Parse(_doc.GetField(name).StringValue()); + } + + public string GetString(string name) { + return _doc.GetField(name).StringValue(); + } + + public System.DateTime GetDateTime(string name) { + return DateTools.StringToDate(_doc.GetField(name).StringValue()); + } + } +} diff --git a/src/Orchard.Web/Core/Indexing/Models/IndexingSettingsRecord.cs b/src/Orchard.Web/Core/Indexing/Models/IndexingSettingsRecord.cs new file mode 100644 index 000000000..28cd06b9d --- /dev/null +++ b/src/Orchard.Web/Core/Indexing/Models/IndexingSettingsRecord.cs @@ -0,0 +1,8 @@ +using System; + +namespace Orchard.Core.Indexing.Models { + public class IndexingSettingsRecord { + public virtual int Id { get; set; } + public virtual DateTime? LatestIndexingUtc { get; set; } + } +} \ No newline at end of file diff --git a/src/Orchard.Web/Core/Indexing/Models/IndexingTask.cs b/src/Orchard.Web/Core/Indexing/Models/IndexingTask.cs new file mode 100644 index 000000000..8ceabded0 --- /dev/null +++ b/src/Orchard.Web/Core/Indexing/Models/IndexingTask.cs @@ -0,0 +1,36 @@ +using System; +using Orchard.ContentManagement; +using Orchard.Tasks.Indexing; + +namespace Orchard.Core.Indexing.Models { + public class IndexingTask : IIndexingTask { + private readonly IContentManager _contentManager; + private readonly IndexingTaskRecord _record; + private ContentItem _item; + private bool _itemInitialized; + + public IndexingTask(IContentManager contentManager, IndexingTaskRecord record) { + // in spite of appearances, this is actually a created class, not IoC, + // but dependencies are passed in for lazy initialization purposes + _contentManager = contentManager; + _record = record; + } + + public DateTime? CreatedUtc { + get { return _record.CreatedUtc; } + } + + public ContentItem ContentItem { + get { + if (!_itemInitialized) { + if (_record.ContentItemRecord != null) { + _item = _contentManager.Get( + _record.ContentItemRecord.Id, VersionOptions.Published); + } + _itemInitialized = true; + } + return _item; + } + } + } +} \ No newline at end of file diff --git a/src/Orchard.Web/Core/Indexing/Models/IndexingTaskRecord.cs b/src/Orchard.Web/Core/Indexing/Models/IndexingTaskRecord.cs new file mode 100644 index 000000000..e0bb9f54b --- /dev/null +++ b/src/Orchard.Web/Core/Indexing/Models/IndexingTaskRecord.cs @@ -0,0 +1,10 @@ +using System; +using Orchard.ContentManagement.Records; + +namespace Orchard.Core.Indexing.Models { + public class IndexingTaskRecord { + public virtual int Id { get; set; } + public virtual DateTime? CreatedUtc { get; set; } + public virtual ContentItemRecord ContentItemRecord { get; set; } + } +} diff --git a/src/Orchard.Web/Core/Indexing/Module.txt b/src/Orchard.Web/Core/Indexing/Module.txt new file mode 100644 index 000000000..18b0612fa --- /dev/null +++ b/src/Orchard.Web/Core/Indexing/Module.txt @@ -0,0 +1,10 @@ +name: Indexing +antiforgery: enabled +author: The Orchard Team +website: http://orchardproject.net +version: 0.1 +orchardversion: 0.1.2010.0312 +features: + Indexing: + Description: Indexing services based on Lucene. + Category: Core \ No newline at end of file diff --git a/src/Orchard.Web/Core/Indexing/Services/CreateIndexingTaskHandler.cs b/src/Orchard.Web/Core/Indexing/Services/CreateIndexingTaskHandler.cs new file mode 100644 index 000000000..7924afb75 --- /dev/null +++ b/src/Orchard.Web/Core/Indexing/Services/CreateIndexingTaskHandler.cs @@ -0,0 +1,30 @@ +using Orchard.ContentManagement.Handlers; +using Orchard.ContentManagement; +using Orchard.Core.Common.Models; +using Orchard.Tasks.Indexing; + +namespace Orchard.Core.Indexing.Services { + /// + /// Intercepts the ContentHandler events to create indexing tasks when a content item + /// is published, and to delete them when the content item is unpublished. + /// + public class CreateIndexingTaskHandler : ContentHandler { + private readonly IIndexingTaskManager _indexingTaskManager; + + public CreateIndexingTaskHandler(IIndexingTaskManager indexingTaskManager) { + _indexingTaskManager = indexingTaskManager; + + OnPublishing>(CreateIndexingTask); + OnRemoved>(RemoveIndexingTask); + } + + void CreateIndexingTask(PublishContentContext context, ContentPart part) { + _indexingTaskManager.CreateTask(context.ContentItem); + } + + void RemoveIndexingTask(RemoveContentContext context, ContentPart part) { + _indexingTaskManager.DeleteTasks(context.ContentItem); + } + + } +} diff --git a/src/Orchard.Web/Core/Indexing/Services/IndexingTaskExecutor.cs b/src/Orchard.Web/Core/Indexing/Services/IndexingTaskExecutor.cs new file mode 100644 index 000000000..7fa3d8a6c --- /dev/null +++ b/src/Orchard.Web/Core/Indexing/Services/IndexingTaskExecutor.cs @@ -0,0 +1,108 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using JetBrains.Annotations; +using Orchard.ContentManagement; +using Orchard.ContentManagement.Handlers; +using Orchard.Data; +using Orchard.Indexing; +using Orchard.Logging; +using Orchard.Services; +using Orchard.Tasks; +using Orchard.Core.Indexing.Models; + +namespace Orchard.Core.Indexing.Services { + /// + /// Contains the logic which is regularly executed to retrieve index information from multiple content handlers. + /// + [UsedImplicitly] + public class IndexingTaskExecutor : IBackgroundTask { + private readonly IClock _clock; + private readonly IRepository _repository; + private readonly IRepository _settings; + private readonly IEnumerable _handlers; + private IIndexProvider _indexProvider; + private IIndexManager _indexManager; + private readonly IContentManager _contentManager; + private const string SearchIndexName = "search"; + + public IndexingTaskExecutor( + IClock clock, + IRepository repository, + IRepository settings, + IEnumerable handlers, + IIndexManager indexManager, + IContentManager contentManager) { + _clock = clock; + _repository = repository; + _settings = settings; + _indexManager = indexManager; + _handlers = handlers; + _contentManager = contentManager; + Logger = NullLogger.Instance; + } + + public ILogger Logger { get; set; } + + public void Sweep() { + + if(!_indexManager.HasIndexProvider()) { + return; + } + + _indexProvider = _indexManager.GetSearchIndexProvider(); + + // retrieve last processed index time + var settingsRecord = _settings.Table.FirstOrDefault(); + + if (settingsRecord == null) { + _settings.Create(settingsRecord = new IndexingSettingsRecord { LatestIndexingUtc = new DateTime(1980, 1, 1)}); + } + + var lastIndexing = settingsRecord.LatestIndexingUtc; + settingsRecord.LatestIndexingUtc = _clock.UtcNow; + + // retrieved not yet processed tasks + var taskRecords = _repository.Fetch(x => x.CreatedUtc >= lastIndexing) + .ToArray(); + + if (taskRecords.Length == 0) + return; + + Logger.Information("Processing {0} indexing tasks", taskRecords.Length); + + + if(!_indexProvider.Exists(SearchIndexName)) { + _indexProvider.CreateIndex(SearchIndexName); + } + + foreach (var taskRecord in taskRecords) { + + try { + var task = new IndexingTask(_contentManager, taskRecord); + var context = new IndexContentContext { + ContentItem = task.ContentItem, + IndexDocument = _indexProvider.New(task.ContentItem.Id) + }; + + // dispatch to handlers to retrieve index information + foreach (var handler in _handlers) { + handler.Indexing(context); + } + + _indexProvider.Store(SearchIndexName, context.IndexDocument); + + foreach ( var handler in _handlers ) { + handler.Indexed(context); + } + } + catch (Exception ex) { + Logger.Warning(ex, "Unable to process indexing task #{0}", taskRecord.Id); + } + + } + + _settings.Update(settingsRecord); + } + } +} diff --git a/src/Orchard.Web/Core/Indexing/Services/IndexingTaskManager.cs b/src/Orchard.Web/Core/Indexing/Services/IndexingTaskManager.cs new file mode 100644 index 000000000..aa37a69e7 --- /dev/null +++ b/src/Orchard.Web/Core/Indexing/Services/IndexingTaskManager.cs @@ -0,0 +1,101 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using JetBrains.Annotations; +using Orchard.ContentManagement; +using Orchard.Data; +using Orchard.Logging; +using Orchard.Tasks.Scheduling; +using Orchard.Utility.Extensions; +using Orchard.Tasks.Indexing; +using Orchard.Core.Indexing.Models; +using Orchard.Services; + +namespace Orchard.Core.Indexing.Services { + [UsedImplicitly] + public class IndexingTaskManager : IIndexingTaskManager { + private readonly IContentManager _contentManager; + private readonly IRepository _repository; + private readonly IRepository _settings; + private readonly IClock _clock; + + public IndexingTaskManager( + IContentManager contentManager, + IRepository repository, + IRepository settings, + IClock clock) { + _clock = clock; + _repository = repository; + _contentManager = contentManager; + Logger = NullLogger.Instance; + } + + public ILogger Logger { get; set; } + + public void CreateTask(ContentItem contentItem) { + if (contentItem == null) { + throw new ArgumentNullException("contentItem"); + } + + // remove previous tasks for the same content item + var tasks = _repository + .Fetch(x => x.Id == contentItem.Id ) + .ToArray(); + + foreach (var task in tasks) { + _repository.Delete(task); + } + + var taskRecord = new IndexingTaskRecord { + CreatedUtc = _clock.UtcNow, + ContentItemRecord = contentItem.Record + }; + + _repository.Create(taskRecord); + + Logger.Information("Indexing task created for [{0}:{1}]", contentItem.ContentType, contentItem.Id); + + } + + public IEnumerable GetTasks(DateTime? createdAfter) { + return _repository + .Fetch(x => x.CreatedUtc > createdAfter) + .Select(x => new IndexingTask(_contentManager, x)) + .Cast() + .ToReadOnlyCollection(); + } + + public void DeleteTasks(DateTime? createdBefore) { + Logger.Debug("Deleting Indexing tasks created before {0}", createdBefore); + + var tasks = _repository + .Fetch(x => x.CreatedUtc <= createdBefore); + + foreach (var task in tasks) { + _repository.Delete(task); + } + } + + public void DeleteTasks(ContentItem contentItem) { + Logger.Debug("Deleting Indexing tasks for ContentItem [{0}:{1}]", contentItem.ContentType, contentItem.Id); + + var tasks = _repository + .Fetch(x => x.Id == contentItem.Id); + + foreach (var task in tasks) { + _repository.Delete(task); + } + } + + public void RebuildIndex() { + var settingsRecord = _settings.Table.FirstOrDefault(); + if (settingsRecord == null) { + _settings.Create(settingsRecord = new IndexingSettingsRecord() ); + } + + settingsRecord.LatestIndexingUtc = new DateTime(1980, 1, 1); + _settings.Update(settingsRecord); + } + + } +} diff --git a/src/Orchard.Web/Core/Orchard.Core.csproj b/src/Orchard.Web/Core/Orchard.Core.csproj index 29108a326..42f35ff19 100644 --- a/src/Orchard.Web/Core/Orchard.Core.csproj +++ b/src/Orchard.Web/Core/Orchard.Core.csproj @@ -39,6 +39,10 @@ AllRules.ruleset + + False + ..\..\..\lib\lucene.net\Lucene.Net.dll + 3.5 @@ -105,6 +109,16 @@ + + + + + + + + + + @@ -159,6 +173,7 @@ + diff --git a/src/Orchard.Web/Modules/Orchard.Setup/Services/SetupService.cs b/src/Orchard.Web/Modules/Orchard.Setup/Services/SetupService.cs index 60b4d54f0..686d3bb50 100644 --- a/src/Orchard.Web/Modules/Orchard.Setup/Services/SetupService.cs +++ b/src/Orchard.Web/Modules/Orchard.Setup/Services/SetupService.cs @@ -60,6 +60,7 @@ namespace Orchard.Setup.Services { "HomePage", "Navigation", "Scheduling", + "Indexing", "Settings", "XmlRpc", "Orchard.Users", diff --git a/src/Orchard/ContentManagement/Drivers/ContentItemDriverHandler.cs b/src/Orchard/ContentManagement/Drivers/ContentItemDriverHandler.cs index 2290af600..ca2cbd200 100644 --- a/src/Orchard/ContentManagement/Drivers/ContentItemDriverHandler.cs +++ b/src/Orchard/ContentManagement/Drivers/ContentItemDriverHandler.cs @@ -33,6 +33,8 @@ namespace Orchard.ContentManagement.Drivers { void IContentHandler.Published(PublishContentContext context) { } void IContentHandler.Removing(RemoveContentContext context) { } void IContentHandler.Removed(RemoveContentContext context) { } + void IContentHandler.Indexing(IndexContentContext context) { } + void IContentHandler.Indexed(IndexContentContext context) { } void IContentHandler.GetContentItemMetadata(GetContentItemMetadataContext context) { diff --git a/src/Orchard/ContentManagement/Drivers/ContentPartDriverHandler.cs b/src/Orchard/ContentManagement/Drivers/ContentPartDriverHandler.cs index 8876386f4..3a6f67da7 100644 --- a/src/Orchard/ContentManagement/Drivers/ContentPartDriverHandler.cs +++ b/src/Orchard/ContentManagement/Drivers/ContentPartDriverHandler.cs @@ -32,6 +32,8 @@ namespace Orchard.ContentManagement.Drivers { void IContentHandler.Published(PublishContentContext context) { } void IContentHandler.Removing(RemoveContentContext context) { } void IContentHandler.Removed(RemoveContentContext context) { } + void IContentHandler.Indexing(IndexContentContext context) { } + void IContentHandler.Indexed(IndexContentContext context) { } void IContentHandler.GetContentItemMetadata(GetContentItemMetadataContext context) { } diff --git a/src/Orchard/ContentManagement/Handlers/ContentHandler.cs b/src/Orchard/ContentManagement/Handlers/ContentHandler.cs index d8227a5da..de68ae0b8 100644 --- a/src/Orchard/ContentManagement/Handlers/ContentHandler.cs +++ b/src/Orchard/ContentManagement/Handlers/ContentHandler.cs @@ -57,6 +57,14 @@ namespace Orchard.ContentManagement.Handlers { Filters.Add(new InlineStorageFilter { OnRemoved = handler }); } + protected void OnIndexing(Action handler) where TPart : class, IContent { + Filters.Add(new InlineStorageFilter { OnIndexing = handler }); + } + + protected void OnIndexed(Action handler) where TPart : class, IContent { + Filters.Add(new InlineStorageFilter { OnIndexed = handler }); + } + protected void OnGetContentItemMetadata(Action handler) where TPart : class, IContent { Filters.Add(new InlineTemplateFilter { OnGetItemMetadata = handler }); } @@ -84,6 +92,8 @@ namespace Orchard.ContentManagement.Handlers { public Action OnPublished { get; set; } public Action OnRemoving { get; set; } public Action OnRemoved { get; set; } + public Action OnIndexing { get; set; } + public Action OnIndexed { get; set; } protected override void Activated(ActivatedContentContext context, TPart instance) { if (OnActivated != null) OnActivated(context, instance); } @@ -117,6 +127,15 @@ namespace Orchard.ContentManagement.Handlers { protected override void Removed(RemoveContentContext context, TPart instance) { if (OnRemoved != null) OnRemoved(context, instance); } + protected override void Indexing(IndexContentContext context, TPart instance) { + if ( OnIndexing != null ) + OnIndexing(context, instance); + } + protected override void Indexed(IndexContentContext context, TPart instance) { + if ( OnIndexed != null ) + OnIndexed(context, instance); + } + } class InlineTemplateFilter : TemplateFilterBase where TPart : class, IContent { @@ -214,6 +233,17 @@ namespace Orchard.ContentManagement.Handlers { Removed(context); } + void IContentHandler.Indexing(IndexContentContext context) { + foreach ( var filter in Filters.OfType() ) + filter.Indexing(context); + Indexing(context); + } + + void IContentHandler.Indexed(IndexContentContext context) { + foreach ( var filter in Filters.OfType() ) + filter.Indexed(context); + Indexing(context); + } void IContentHandler.GetContentItemMetadata(GetContentItemMetadataContext context) { foreach (var filter in Filters.OfType()) @@ -254,6 +284,9 @@ namespace Orchard.ContentManagement.Handlers { protected virtual void Removing(RemoveContentContext context) { } protected virtual void Removed(RemoveContentContext context) { } + protected virtual void Indexing(IndexContentContext context) { } + protected virtual void Indexed(IndexContentContext context) { } + protected virtual void GetItemMetadata(GetContentItemMetadataContext context) { } protected virtual void BuildDisplayModel(BuildDisplayModelContext context) { } protected virtual void BuildEditorModel(BuildEditorModelContext context) { } diff --git a/src/Orchard/ContentManagement/Handlers/IContentHandler.cs b/src/Orchard/ContentManagement/Handlers/IContentHandler.cs index 057165c3c..846e6a1fd 100644 --- a/src/Orchard/ContentManagement/Handlers/IContentHandler.cs +++ b/src/Orchard/ContentManagement/Handlers/IContentHandler.cs @@ -17,6 +17,8 @@ namespace Orchard.ContentManagement.Handlers { void Published(PublishContentContext context); void Removing(RemoveContentContext context); void Removed(RemoveContentContext context); + void Indexing(IndexContentContext context); + void Indexed(IndexContentContext context); void GetContentItemMetadata(GetContentItemMetadataContext context); void BuildDisplayModel(BuildDisplayModelContext context); diff --git a/src/Orchard/ContentManagement/Handlers/IContentStorageFilter.cs b/src/Orchard/ContentManagement/Handlers/IContentStorageFilter.cs index 6d9f840b5..a3bbe7b40 100644 --- a/src/Orchard/ContentManagement/Handlers/IContentStorageFilter.cs +++ b/src/Orchard/ContentManagement/Handlers/IContentStorageFilter.cs @@ -11,5 +11,7 @@ namespace Orchard.ContentManagement.Handlers { void Published(PublishContentContext context); void Removing(RemoveContentContext context); void Removed(RemoveContentContext context); + void Indexing(IndexContentContext context); + void Indexed(IndexContentContext context); } } diff --git a/src/Orchard/ContentManagement/Handlers/IndexContentContext.cs b/src/Orchard/ContentManagement/Handlers/IndexContentContext.cs new file mode 100644 index 000000000..4e5c8f443 --- /dev/null +++ b/src/Orchard/ContentManagement/Handlers/IndexContentContext.cs @@ -0,0 +1,8 @@ +using Orchard.Indexing; + +namespace Orchard.ContentManagement.Handlers { + public class IndexContentContext { + public ContentItem ContentItem { get; set; } + public IIndexDocument IndexDocument { get; set; } + } +} diff --git a/src/Orchard/ContentManagement/Handlers/StorageFilterBase.cs b/src/Orchard/ContentManagement/Handlers/StorageFilterBase.cs index a7158f7b0..d63e0e4a7 100644 --- a/src/Orchard/ContentManagement/Handlers/StorageFilterBase.cs +++ b/src/Orchard/ContentManagement/Handlers/StorageFilterBase.cs @@ -12,6 +12,8 @@ namespace Orchard.ContentManagement.Handlers { protected virtual void Published(PublishContentContext context, TPart instance) { } protected virtual void Removing(RemoveContentContext context, TPart instance) { } protected virtual void Removed(RemoveContentContext context, TPart instance) { } + protected virtual void Indexing(IndexContentContext context, TPart instance) { } + protected virtual void Indexed(IndexContentContext context, TPart instance) { } void IContentStorageFilter.Activated(ActivatedContentContext context) { @@ -68,5 +70,16 @@ namespace Orchard.ContentManagement.Handlers { if (context.ContentItem.Is()) Removed(context, context.ContentItem.As()); } + + void IContentStorageFilter.Indexing(IndexContentContext context) { + if ( context.ContentItem.Is() ) + Indexing(context, context.ContentItem.As()); + } + + void IContentStorageFilter.Indexed(IndexContentContext context) { + if ( context.ContentItem.Is() ) + Indexed(context, context.ContentItem.As()); + } + } } diff --git a/src/Orchard/Environment/OrchardStarter.cs b/src/Orchard/Environment/OrchardStarter.cs index c1cec7593..f8e6ec8f4 100644 --- a/src/Orchard/Environment/OrchardStarter.cs +++ b/src/Orchard/Environment/OrchardStarter.cs @@ -4,7 +4,6 @@ using System.IO; using System.Web.Hosting; using Autofac; using Autofac.Configuration; -using Autofac.Integration.Web; using Orchard.Caching; using Orchard.Environment.AutofacUtil; using Orchard.Environment.Configuration; diff --git a/src/Orchard/Indexing/DefaultIndexManager.cs b/src/Orchard/Indexing/DefaultIndexManager.cs new file mode 100644 index 000000000..b5aa3047f --- /dev/null +++ b/src/Orchard/Indexing/DefaultIndexManager.cs @@ -0,0 +1,25 @@ +using System.Collections.Generic; +using System.Linq; + +namespace Orchard.Indexing { + public class DefaultIndexManager : IIndexManager { + + private readonly IEnumerable _indexProviders; + + public DefaultIndexManager(IEnumerable indexProviders) { + _indexProviders = indexProviders; + } + + #region IIndexManager Members + + public bool HasIndexProvider() { + return _indexProviders.AsQueryable().Count() > 0; + } + + public IIndexProvider GetSearchIndexProvider() { + return _indexProviders.AsQueryable().FirstOrDefault(); + } + + #endregion + } +} \ No newline at end of file diff --git a/src/Orchard/Indexing/IIndexDocument.cs b/src/Orchard/Indexing/IIndexDocument.cs new file mode 100644 index 000000000..5cf9d4d6f --- /dev/null +++ b/src/Orchard/Indexing/IIndexDocument.cs @@ -0,0 +1,27 @@ +using System; +using System.Collections.Generic; + +namespace Orchard.Indexing { + + public interface IIndexDocument { + + IIndexDocument SetContentItemId(int documentId); + + IIndexDocument Add(string name, string value); + IIndexDocument Add(string name, DateTime value); + IIndexDocument Add(string name, int value); + IIndexDocument Add(string name, bool value); + IIndexDocument Add(string name, float value); + + /// + /// Whether to store the original value to the index + /// + IIndexDocument Store(bool store); + + /// + /// Whether the content should be tokenized or not. If not, value will be taken as a whole + /// + IIndexDocument Analyze(bool analyze); + + } +} \ No newline at end of file diff --git a/src/Orchard/Indexing/IIndexManager.cs b/src/Orchard/Indexing/IIndexManager.cs new file mode 100644 index 000000000..bee4dfabf --- /dev/null +++ b/src/Orchard/Indexing/IIndexManager.cs @@ -0,0 +1,7 @@ +namespace Orchard.Indexing { + public interface IIndexManager : IDependency { + + bool HasIndexProvider(); + IIndexProvider GetSearchIndexProvider(); + } +} \ No newline at end of file diff --git a/src/Orchard/Indexing/IIndexProvider.cs b/src/Orchard/Indexing/IIndexProvider.cs new file mode 100644 index 000000000..dcd44c06a --- /dev/null +++ b/src/Orchard/Indexing/IIndexProvider.cs @@ -0,0 +1,45 @@ +namespace Orchard.Indexing { + public interface IIndexProvider : IDependency { + /// + /// Creates a new index + /// + void CreateIndex(string name); + + /// + /// Checks whether an index is already existing or not + /// + bool Exists(string name); + + /// + /// Deletes an existing index + /// + void DeleteIndex(string name); + + /// + /// Loads an existing document + /// + IIndexDocument Get(string indexName, int documentId); + + /// + /// Creates an empty document + /// + /// + IIndexDocument New(int documentId); + + /// + /// Adds a new document to the index + /// + void Store(string indexName, IIndexDocument indexDocument); + + /// + /// Removes an existing document from the index + /// + void Delete(string indexName, int id); + + /// + /// Creates a search builder for this provider + /// + /// A search builder instance + ISearchBuilder CreateSearchBuilder(string indexName); + } +} \ No newline at end of file diff --git a/src/Orchard/Indexing/ISearchBuilder.cs b/src/Orchard/Indexing/ISearchBuilder.cs new file mode 100644 index 000000000..3994a37bd --- /dev/null +++ b/src/Orchard/Indexing/ISearchBuilder.cs @@ -0,0 +1,24 @@ +using System; +using System.Collections.Generic; + +namespace Orchard.Indexing { + public interface ISearchBuilder { + + ISearchBuilder Parse(string query); + + ISearchBuilder WithField(string field, string value); + ISearchBuilder WithField(string field, string value, bool wildcardSearch); + + ISearchBuilder After(string name, DateTime date); + ISearchBuilder Before(string name, DateTime date); + ISearchBuilder SortBy(string name); + ISearchBuilder Ascending(); + + ISearchBuilder Slice(int skip, int count); + IEnumerable Search(); + ISearchHit Get(int documentId); + int Count(); + + + } +} diff --git a/src/Orchard/Indexing/ISearchHit.cs b/src/Orchard/Indexing/ISearchHit.cs new file mode 100644 index 000000000..0382e3d0e --- /dev/null +++ b/src/Orchard/Indexing/ISearchHit.cs @@ -0,0 +1,13 @@ +using System; +namespace Orchard.Indexing { + public interface ISearchHit { + int Id { get; } + float Score { get; } + + int GetInt(string name); + float GetFloat(string name); + bool GetBoolean(string name); + string GetString(string name); + DateTime GetDateTime(string name); + } +} diff --git a/src/Orchard/Orchard.Framework.csproj b/src/Orchard/Orchard.Framework.csproj index 810cff1aa..f43d1f4af 100644 --- a/src/Orchard/Orchard.Framework.csproj +++ b/src/Orchard/Orchard.Framework.csproj @@ -145,6 +145,8 @@ + + @@ -163,6 +165,13 @@ + + + + + + + diff --git a/src/Orchard/Tasks/Indexing/IIndexingTask.cs b/src/Orchard/Tasks/Indexing/IIndexingTask.cs new file mode 100644 index 000000000..f53bb1067 --- /dev/null +++ b/src/Orchard/Tasks/Indexing/IIndexingTask.cs @@ -0,0 +1,9 @@ +using System; +using Orchard.ContentManagement; + +namespace Orchard.Tasks.Indexing { + public interface IIndexingTask { + ContentItem ContentItem { get; } + DateTime? CreatedUtc { get; } + } +} diff --git a/src/Orchard/Tasks/Indexing/IIndexingTaskManager.cs b/src/Orchard/Tasks/Indexing/IIndexingTaskManager.cs new file mode 100644 index 000000000..de913e167 --- /dev/null +++ b/src/Orchard/Tasks/Indexing/IIndexingTaskManager.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using Orchard.ContentManagement; + +namespace Orchard.Tasks.Indexing { + public interface IIndexingTaskManager : IDependency { + void CreateTask(ContentItem contentItem); + IEnumerable GetTasks(DateTime? createdAfter); + void DeleteTasks(DateTime? createdBefore); + void DeleteTasks(ContentItem contentItem); + } +} \ No newline at end of file diff --git a/src/Orchard/Tasks/SweepGenerator.cs b/src/Orchard/Tasks/SweepGenerator.cs index 6612f4181..b8fb5b64b 100644 --- a/src/Orchard/Tasks/SweepGenerator.cs +++ b/src/Orchard/Tasks/SweepGenerator.cs @@ -7,7 +7,7 @@ using Orchard.Logging; namespace Orchard.Tasks { public class SweepGenerator : IOrchardShellEvents { private readonly IContainer _container; - private Timer _timer; + private readonly Timer _timer; public SweepGenerator(IContainer container) { _container = container;