Excluded html tags from indexed body content

--HG--
branch : dev
This commit is contained in:
Sebastien Ros
2010-06-02 17:41:56 -07:00
parent 923621bbbf
commit 54c57ac99d
6 changed files with 36 additions and 6 deletions

View File

@@ -155,7 +155,18 @@ namespace Orchard.Tests.Indexing {
Assert.That(searchBuilder.Get(1).Id, Is.EqualTo(1)); Assert.That(searchBuilder.Get(1).Id, Is.EqualTo(1));
Assert.That(searchBuilder.Get(11).Id, Is.EqualTo(11)); Assert.That(searchBuilder.Get(11).Id, Is.EqualTo(11));
Assert.That(searchBuilder.Get(111).Id, Is.EqualTo(111)); Assert.That(searchBuilder.Get(111).Id, Is.EqualTo(111));
}
[Test]
public void TagsShouldBeRemoved() {
_provider.CreateIndex("default");
_provider.Store("default", _provider.New(1).Add("body", "<hr>some content</hr>"));
_provider.Store("default", _provider.New(2).Add("body", "<hr>some content</hr>", true));
var searchBuilder = _provider.CreateSearchBuilder("default");
Assert.That(searchBuilder.WithField("body", "hr").Search().Count(), Is.EqualTo(1));
Assert.That(searchBuilder.WithField("body", "hr").Search().First().Id, Is.EqualTo(1));
} }
} }
} }

View File

@@ -10,7 +10,7 @@ namespace Orchard.Core.Common.Handlers {
Filters.Add(StorageFilter.For(bodyRepository)); Filters.Add(StorageFilter.For(bodyRepository));
OnIndexing<BodyAspect>((context, bodyAspect) => context.IndexDocument OnIndexing<BodyAspect>((context, bodyAspect) => context.IndexDocument
.Add("body", bodyAspect.Record.Text).Store(false) .Add("body", bodyAspect.Record.Text, true).Store(false)
.Add("format", bodyAspect.Record.Format).Analyze(false)); .Add("format", bodyAspect.Record.Format).Analyze(false));
} }
} }

View File

@@ -1,7 +1,10 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Web.Mvc;
using Lucene.Net.Documents; using Lucene.Net.Documents;
using Orchard.Indexing; using Orchard.Indexing;
using Orchard.Mvc.Html;
using Orchard.Utility.Extensions;
namespace Orchard.Core.Indexing.Lucene { namespace Orchard.Core.Indexing.Lucene {
@@ -17,7 +20,15 @@ namespace Orchard.Core.Indexing.Lucene {
} }
public IIndexDocument Add(string name, string value) { public IIndexDocument Add(string name, string value) {
return Add(name, value, false);
}
public IIndexDocument Add(string name, string value, bool removeTags) {
AppendPreviousField(); AppendPreviousField();
if(removeTags) {
value = value.RemoveTags();
}
_previousField = new Field(name, value, Field.Store.YES, Field.Index.ANALYZED); _previousField = new Field(name, value, Field.Store.YES, Field.Index.ANALYZED);
return this; return this;
} }
@@ -27,6 +38,7 @@ namespace Orchard.Core.Indexing.Lucene {
_previousField = new Field(name, DateTools.DateToString(value, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.NOT_ANALYZED); _previousField = new Field(name, DateTools.DateToString(value, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.NOT_ANALYZED);
return this; return this;
} }
public IIndexDocument Add(string name, int value) { public IIndexDocument Add(string name, int value) {
AppendPreviousField(); AppendPreviousField();
_previousField = new NumericField(name, Field.Store.YES, true).SetIntValue(value); _previousField = new NumericField(name, Field.Store.YES, true).SetIntValue(value);

View File

@@ -8,20 +8,22 @@ namespace Orchard.Indexing {
IIndexDocument SetContentItemId(int documentId); IIndexDocument SetContentItemId(int documentId);
IIndexDocument Add(string name, string value); IIndexDocument Add(string name, string value);
IIndexDocument Add(string name, string value, bool removeTags);
IIndexDocument Add(string name, DateTime value); IIndexDocument Add(string name, DateTime value);
IIndexDocument Add(string name, int value); IIndexDocument Add(string name, int value);
IIndexDocument Add(string name, bool value); IIndexDocument Add(string name, bool value);
IIndexDocument Add(string name, float value); IIndexDocument Add(string name, float value);
/// <summary> /// <summary>
/// Whether to store the original value to the index /// Whether to store the original value to the index.
/// </summary> /// </summary>
IIndexDocument Store(bool store); IIndexDocument Store(bool store);
/// <summary> /// <summary>
/// Whether the content should be tokenized or not. If not, value will be taken as a whole /// Whether the content should be tokenized or not. If not, value will be taken as a whole.
/// </summary> /// </summary>
IIndexDocument Analyze(bool analyze); IIndexDocument Analyze(bool analyze);
} }
} }

View File

@@ -94,10 +94,8 @@ namespace Orchard.Mvc.Html {
#region Excerpt #region Excerpt
public static MvcHtmlString Excerpt(this HtmlHelper html, string markup, int length) { public static MvcHtmlString Excerpt(this HtmlHelper html, string markup, int length) {
var tagRegex = new Regex("<[^<>]*>", RegexOptions.Singleline);
var text = tagRegex.Replace(markup, "");
return MvcHtmlString.Create(text.Ellipsize(length)); return MvcHtmlString.Create(markup.RemoveTags().Ellipsize(length));
} }
#endregion #endregion

View File

@@ -29,5 +29,12 @@ namespace Orchard.Utility.Extensions {
? defaultValue ? defaultValue
: text; : text;
} }
public static string RemoveTags(this string html) {
var tagRegex = new Regex("<[^<>]*>", RegexOptions.Singleline);
var text = tagRegex.Replace(html, "");
return text;
}
} }
} }