Changed querying behavior to handle special characters (like C#)

--HG--
branch : dev
This commit is contained in:
Sebastien Ros
2010-06-07 12:26:32 -07:00
parent 887439352d
commit 11c5ff1a42
3 changed files with 43 additions and 2 deletions

View File

@@ -177,5 +177,19 @@ namespace Orchard.Tests.Indexing {
Assert.That(date[0].GetDateTime("date") < date[1].GetDateTime("date"), Is.True);
Assert.That(date[1].GetDateTime("date") < date[2].GetDateTime("date"), Is.True);
}
[Test]
public void ShouldEscapeSpecialChars() {
_provider.CreateIndex("default");
_provider.Store("default", _provider.New(1).Add("body", "Orchard has been developped in C#"));
_provider.Store("default", _provider.New(2).Add("body", "Windows has been developped in C++"));
var cs = _searchBuilder.WithField("body", "C#").Search().ToList();
Assert.That(cs.Count(), Is.EqualTo(2));
var cpp = _searchBuilder.WithField("body", "C++").Search().ToList();
Assert.That(cpp.Count(), Is.EqualTo(2));
}
}
}

View File

@@ -23,7 +23,7 @@ namespace Orchard.Core.Indexing.Lucene {
private readonly IAppDataFolder _appDataFolder;
private readonly ShellSettings _shellSettings;
public static readonly Version LuceneVersion = Version.LUCENE_29;
private readonly Analyzer _analyzer = new StandardAnalyzer(LuceneVersion);
private readonly Analyzer _analyzer ;
private readonly string _basePath;
public static readonly DateTime DefaultMinDateTime = new DateTime(1980, 1, 1);
public static readonly string Settings = "Settings";
@@ -34,6 +34,7 @@ namespace Orchard.Core.Indexing.Lucene {
public DefaultIndexProvider(IAppDataFolder appDataFolder, ShellSettings shellSettings) {
_appDataFolder = appDataFolder;
_shellSettings = shellSettings;
_analyzer = CreateAnalyzer();
// TODO: (sebros) Find a common way to get where tenant's specific files should go. "Sites/Tenant" is hard coded in multiple places
_basePath = Path.Combine("Sites", _shellSettings.Name, "Indexes");
@@ -44,6 +45,11 @@ namespace Orchard.Core.Indexing.Lucene {
EnsureDirectoryExists();
}
public static Analyzer CreateAnalyzer() {
// StandardAnalyzer does lower-case and stop-word filtering. It also removes punctuation
return new StandardAnalyzer(LuceneVersion);
}
private void EnsureDirectoryExists() {
var directory = new DirectoryInfo(_appDataFolder.MapPath(_basePath));
if(!directory.Exists) {

View File

@@ -2,12 +2,15 @@
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Orchard.Logging;
using Lucene.Net.Documents;
using Orchard.Indexing;
using Lucene.Net.QueryParsers;
namespace Orchard.Core.Indexing.Lucene {
public class DefaultSearchBuilder : ISearchBuilder {
@@ -23,6 +26,8 @@ namespace Orchard.Core.Indexing.Lucene {
private readonly Dictionary<string, DateTime> _after;
private string _sort;
private bool _sortDescending;
private string _parse;
private readonly Analyzer _analyzer;
public ILogger Logger { get; set; }
@@ -37,9 +42,12 @@ namespace Orchard.Core.Indexing.Lucene {
_fields = new Dictionary<string, Query[]>();
_sort = String.Empty;
_sortDescending = true;
_parse = String.Empty;
_analyzer = DefaultIndexProvider.CreateAnalyzer();
}
public ISearchBuilder Parse(string query) {
_parse = query;
return this;
}
@@ -49,8 +57,17 @@ namespace Orchard.Core.Indexing.Lucene {
public ISearchBuilder WithField(string field, string value, bool wildcardSearch) {
_fields[field] = value.Split(' ')
var tokens = new List<string>();
using(var sr = new System.IO.StringReader(value)) {
var stream = _analyzer.TokenStream(field, sr);
while(stream.IncrementToken()) {
tokens.Add(((TermAttribute)stream.GetAttribute(typeof(TermAttribute))).Term());
}
}
_fields[field] = tokens
.Where(k => !String.IsNullOrWhiteSpace(k))
.Select(QueryParser.Escape)
.Select(k => wildcardSearch ? (Query)new PrefixQuery(new Term(field, k)) : new TermQuery(new Term(k)))
.ToArray();
@@ -93,6 +110,10 @@ namespace Orchard.Core.Indexing.Lucene {
}
private Query CreateQuery() {
if(!String.IsNullOrWhiteSpace(_parse)) {
return new QueryParser(DefaultIndexProvider.LuceneVersion, "body", DefaultIndexProvider.CreateAnalyzer()).Parse(_parse);
}
var query = new BooleanQuery();
if ( _fields.Keys.Count > 0 ) { // apply specific filters if defined