Implementing revised indexing loop design

--HG--
branch : indexing
This commit is contained in:
Sebastien Ros
2011-03-04 11:05:19 -08:00
parent 32efbc19cc
commit 6d3dffd77e
15 changed files with 306 additions and 275 deletions

View File

@@ -5,13 +5,14 @@ namespace Orchard.Indexing.Services {
public class IndexEntry {
public string IndexName { get; set; }
public int DocumentCount { get; set; }
public DateTime? LastUpdateUtc { get; set; }
public DateTime LastUpdateUtc { get; set; }
public IEnumerable<string> Fields { get; set; }
public IndexingStatus IndexingStatus { get; set; }
}
public interface IIndexingService : IDependency {
void RebuildIndex();
void UpdateIndex();
IndexEntry GetIndexEntry();
void RebuildIndex(string indexName);
void UpdateIndex(string indexName);
IndexEntry GetIndexEntry(string indexName);
}
}

View File

@@ -0,0 +1,13 @@
using System;
namespace Orchard.Indexing.Services {
public enum IndexingStatus {
Rebuilding,
Updating,
Idle
}
public interface IIndexStatisticsProvider : IDependency {
DateTime GetLastIndexedUtc(string indexName);
IndexingStatus GetIndexingStatus(string indexName);
}
}

View File

@@ -1,59 +1,65 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using Orchard.Localization;
using Orchard.Localization.Services;
using Orchard.UI.Notify;
namespace Orchard.Indexing.Services
{
public class IndexingService : IIndexingService {
private const string SearchIndexName = "Search";
private readonly IIndexManager _indexManager;
private readonly IEnumerable<IIndexNotifierHandler> _indexNotifierHandlers;
private readonly IIndexStatisticsProvider _indexStatisticsProvider;
public IndexingService(IOrchardServices services, IIndexManager indexManager, IEnumerable<IIndexNotifierHandler> indexNotifierHandlers, ICultureManager cultureManager) {
public IndexingService(
IOrchardServices services,
IIndexManager indexManager,
IEnumerable<IIndexNotifierHandler> indexNotifierHandlers,
IIndexStatisticsProvider indexStatisticsProvider) {
Services = services;
_indexManager = indexManager;
_indexNotifierHandlers = indexNotifierHandlers;
_indexStatisticsProvider = indexStatisticsProvider;
T = NullLocalizer.Instance;
}
public IOrchardServices Services { get; set; }
public Localizer T { get; set; }
void IIndexingService.RebuildIndex() {
void IIndexingService.RebuildIndex(string indexName) {
if (!_indexManager.HasIndexProvider()) {
Services.Notifier.Warning(T("There is no search index to rebuild."));
return;
}
var searchProvider = _indexManager.GetSearchIndexProvider();
if (searchProvider.Exists(SearchIndexName))
searchProvider.DeleteIndex(SearchIndexName);
if (searchProvider.Exists(indexName))
searchProvider.DeleteIndex(indexName);
searchProvider.CreateIndex(SearchIndexName); // or just reset the updated date and let the background process recreate the index
searchProvider.CreateIndex(indexName); // or just reset the updated date and let the background process recreate the index
Services.Notifier.Information(T("The search index has been rebuilt."));
Services.Notifier.Information(T("The index {0} has been rebuilt.", indexName));
}
void IIndexingService.UpdateIndex() {
void IIndexingService.UpdateIndex(string indexName) {
foreach(var handler in _indexNotifierHandlers) {
handler.UpdateIndex(SearchIndexName);
handler.UpdateIndex(indexName);
}
Services.Notifier.Information(T("The search index has been updated."));
}
IndexEntry IIndexingService.GetIndexEntry() {
IndexEntry IIndexingService.GetIndexEntry(string indexName) {
var provider = _indexManager.GetSearchIndexProvider();
if (provider == null)
return null;
return new IndexEntry {
IndexName = SearchIndexName,
DocumentCount = provider.NumDocs(SearchIndexName),
Fields = provider.GetFields(SearchIndexName),
LastUpdateUtc = provider.GetLastIndexUtc(SearchIndexName)
IndexName = indexName,
DocumentCount = provider.NumDocs(indexName),
Fields = provider.GetFields(indexName),
LastUpdateUtc = _indexStatisticsProvider.GetLastIndexedUtc(indexName),
IndexingStatus = _indexStatisticsProvider.GetIndexingStatus(indexName)
};
}
}

View File

@@ -11,7 +11,6 @@ using Orchard.Indexing.Models;
using Orchard.Indexing.Settings;
using Orchard.Logging;
using Orchard.Services;
using Orchard.Tasks.Indexing;
namespace Orchard.Indexing.Services {
/// <summary>
@@ -22,35 +21,33 @@ namespace Orchard.Indexing.Services {
/// and singleton locks would not be shared accross those two.
/// </remarks>
[UsedImplicitly]
public class IndexingTaskExecutor : IIndexNotifierHandler {
private readonly IClock _clock;
public class IndexingTaskExecutor : IIndexNotifierHandler, IIndexStatisticsProvider {
private readonly IRepository<IndexingTaskRecord> _repository;
private IIndexProvider _indexProvider;
private readonly IIndexManager _indexManager;
private readonly IIndexingTaskManager _indexingTaskManager;
private readonly IContentManager _contentManager;
private readonly IAppDataFolder _appDataFolder;
private readonly ShellSettings _shellSettings;
private readonly ILockFileManager _lockFileManager;
private readonly IClock _clock;
private const int ContentItemsPerLoop = 100;
private IndexingStatus _indexingStatus = IndexingStatus.Idle;
public IndexingTaskExecutor(
IClock clock,
IRepository<IndexingTaskRecord> repository,
IIndexManager indexManager,
IIndexingTaskManager indexingTaskManager,
IContentManager contentManager,
IAppDataFolder appDataFolder,
ShellSettings shellSettings,
ILockFileManager lockFileManager) {
_clock = clock;
ILockFileManager lockFileManager,
IClock clock) {
_repository = repository;
_indexManager = indexManager;
_indexingTaskManager = indexingTaskManager;
_contentManager = contentManager;
_appDataFolder = appDataFolder;
_shellSettings = shellSettings;
_lockFileManager = lockFileManager;
_clock = clock;
Logger = NullLogger.Instance;
}
@@ -61,120 +58,192 @@ namespace Orchard.Indexing.Services {
var settingsFilename = GetSettingsFileName(indexName);
var lockFilename = settingsFilename + ".lock";
// acquire a lock file on the index
if (!_lockFileManager.TryAcquireLock(lockFilename, ref lockFile)) {
Logger.Information("Index was requested but was already running");
Logger.Information("Index was requested but is already running");
return;
}
using (lockFile) {
using (lockFile)
{
if (!_indexManager.HasIndexProvider()) {
return;
}
// load index settings to know what is the current state of indexing
var indexSettings = LoadSettings(indexName);
_indexProvider = _indexManager.GetSearchIndexProvider();
var updateIndexDocuments = new List<IDocumentIndex>();
var addedContentItemIds = new List<string>();
DateTime? lastIndexUtc;
// Do we need to rebuild the full index (first time module is used, or rebuild index requested) ?
if (_indexProvider.IsEmpty(indexName)) {
Logger.Information("Rebuild index started");
// mark current last task, as we should process older ones (in case of rebuild index only)
lastIndexUtc = _indexingTaskManager.GetLastTaskDateTime();
// get every existing content item to index it
foreach (var contentItem in _contentManager.Query(VersionOptions.Published).List()) {
try {
// skip items which are not indexed
var settings = GetTypeIndexingSettings(contentItem);
if (!settings.Included)
continue;
var documentIndex = _indexProvider.New(contentItem.Id);
_contentManager.Index(contentItem, documentIndex);
if (documentIndex.IsDirty) {
updateIndexDocuments.Add(documentIndex);
addedContentItemIds.Add(contentItem.Id.ToString());
}
}
catch (Exception ex) {
Logger.Warning(ex, "Unable to index content item #{0} during rebuild", contentItem.Id);
}
}
}
else {
// retrieve last processed index time
lastIndexUtc = _indexProvider.GetLastIndexUtc(indexName);
}
_indexProvider.SetLastIndexUtc(indexName, _clock.UtcNow);
// retrieve not yet processed tasks
var taskRecords = lastIndexUtc == null
? _repository.Fetch(x => true).ToArray()
: _repository.Fetch(x => x.CreatedUtc >= lastIndexUtc).ToArray(); // CreatedUtc and lastIndexUtc might be equal if a content item is created in a background task
// nothing to do ?)))
if (taskRecords.Length + updateIndexDocuments.Count == 0) {
Logger.Information("Index update requested, nothing to do");
return;
}
Logger.Information("Processing {0} indexing tasks", taskRecords.Length);
// should the index be rebuilt
if (!_indexProvider.Exists(indexName)) {
_indexProvider.CreateIndex(indexName);
indexSettings = new IndexSettings();
}
// process Delete tasks
try {
var deleteIds = taskRecords.Where(t => t.Action == IndexingTaskRecord.Delete).Select(t => t.ContentItemRecord.Id).ToArray();
if (deleteIds.Length > 0) {
_indexProvider.Delete(indexName, deleteIds);
Logger.Information("Deleted content items from index: {0}", String.Join(", ", deleteIds));
}
}
catch (Exception ex) {
Logger.Warning(ex, "An error occured while removing a document from the index");
}
// execute indexing commands by batch of [ContentItemsPerLoop] content items
for (; ; ){
var addToIndex = new List<IDocumentIndex>();
var deleteFromIndex = new List<int>();
// process Update tasks
foreach (var taskRecord in taskRecords.Where(t => t.Action == IndexingTaskRecord.Update)) {
var task = new IndexingTask(_contentManager, taskRecord);
// Rebuilding the index ?
if (indexSettings.Mode == IndexingMode.Rebuild) {
Logger.Information("Rebuilding index");
_indexingStatus = IndexingStatus.Rebuilding;
// skip items which are not indexed
var settings = GetTypeIndexingSettings(task.ContentItem);
if (!settings.Included)
continue;
// store the last inserted task
var lastIndexId = _repository
.Fetch(x => true)
.OrderByDescending(x => x.Id)
.Select(x => x.Id)
.FirstOrDefault();
try {
var documentIndex = _indexProvider.New(task.ContentItem.Id);
_contentManager.Index(task.ContentItem, documentIndex);
if (!addedContentItemIds.Contains(task.ContentItem.Id.ToString()) && documentIndex.IsDirty) {
updateIndexDocuments.Add(documentIndex);
// load all content items
var contentItemIds = _contentManager
.Query(VersionOptions.Published)
.List()
.Where(x => x.Id > indexSettings.LastContentId)
.OrderBy(x => x.Id)
.Select(x => x.Id)
.Distinct()
.Take(ContentItemsPerLoop)
.ToArray();
indexSettings.LastIndexedId = lastIndexId;
// if no more elements to index, switch to update mode
if (contentItemIds.Length == 0) {
indexSettings.Mode = IndexingMode.Update;
}
}
catch (Exception ex) {
Logger.Warning(ex, "Unable to process indexing task #{0}", taskRecord.Id);
}
}
foreach (var id in contentItemIds) {
try {
IDocumentIndex documentIndex = ExtractDocumentIndex(id);
if (updateIndexDocuments.Count > 0) {
if (documentIndex != null && documentIndex.IsDirty) {
addToIndex.Add(documentIndex);
}
// store the last processed element
indexSettings.LastContentId = contentItemIds.LastOrDefault();
}
catch (Exception ex) {
Logger.Warning(ex, "Unable to index content item #{0} during rebuild", id);
}
}
}
if (indexSettings.Mode == IndexingMode.Update) {
Logger.Information("Updating index");
_indexingStatus = IndexingStatus.Updating;
// load next content items to index, by filtering and ordering on the task id
var lastIndexId = _repository
.Fetch(x => x.Id > indexSettings.LastIndexedId)
.OrderByDescending(x => x.Id)
.Select(x => x.Id)
.FirstOrDefault();
var contentItemIds = _repository
.Fetch(x => x.Id > indexSettings.LastIndexedId)
.OrderBy(x => x.Id)
.Take(ContentItemsPerLoop)
.Select(x => x.ContentItemRecord.Id)
.Distinct() // don't process the same content item twice
.ToArray();
indexSettings.LastIndexedId = lastIndexId;
foreach (var id in contentItemIds) {
try {
IDocumentIndex documentIndex = ExtractDocumentIndex(id);
if (documentIndex == null) {
deleteFromIndex.Add(id);
}
else if (documentIndex.IsDirty) {
addToIndex.Add(documentIndex);
}
}
catch (Exception ex) {
Logger.Warning(ex, "Unable to index content item #{0} during rebuild", id);
}
}
}
// save current state of the index
indexSettings.LastIndexedUtc = _clock.UtcNow;
_appDataFolder.CreateFile(settingsFilename, indexSettings.ToString());
if (deleteFromIndex.Count == 0 && addToIndex.Count == 0) {
// nothing more to do
_indexingStatus = IndexingStatus.Idle;
return;
}
// save new and updated documents to the index
try {
_indexProvider.Store(indexName, updateIndexDocuments);
Logger.Information("Added content items to index: {0}", String.Join(", ", addedContentItemIds));
if (addToIndex.Count > 0) {
_indexProvider.Store(indexName, addToIndex);
Logger.Information("Added content items to index: {0}", addToIndex.Count);
}
}
catch (Exception ex) {
Logger.Warning(ex, "An error occured while adding a document to the index");
}
// removing documents from the index
try {
if (deleteFromIndex.Count > 0) {
_indexProvider.Delete(indexName, deleteFromIndex);
Logger.Information("Added content items to index: {0}", addToIndex.Count);
}
}
catch (Exception ex) {
Logger.Warning(ex, "An error occured while removing a document from the index");
}
}
}
}
/// <summary>
/// Loads the settings file or create a new default one if it doesn't exist
/// </summary>
public IndexSettings LoadSettings(string indexName) {
var indexSettings = new IndexSettings();
var settingsFilename = GetSettingsFileName(indexName);
if (_appDataFolder.FileExists(settingsFilename)) {
var content = _appDataFolder.ReadFile(settingsFilename);
indexSettings = IndexSettings.Parse(content);
}
return indexSettings;
}
/// <summary>
/// Creates a IDocumentIndex instance for a specific content item id. If the content
/// item is no more published, it returns null.
/// </summary>
private IDocumentIndex ExtractDocumentIndex(int id) {
var contentItem = _contentManager.Get(id, VersionOptions.Published);
// ignore deleted or unpublished items
if(contentItem == null || !contentItem.IsPublished()) {
return null;
}
// skip items from types which are not indexed
var settings = GetTypeIndexingSettings(contentItem);
if (!settings.Included)
return null;
var documentIndex = _indexProvider.New(contentItem.Id);
// call all handlers to add content to index
_contentManager.Index(contentItem, documentIndex);
return documentIndex;
}
static TypeIndexing GetTypeIndexingSettings(ContentItem contentItem) {
if (contentItem == null ||
contentItem.TypeDefinition == null ||
@@ -187,5 +256,14 @@ namespace Orchard.Indexing.Services {
private string GetSettingsFileName(string indexName) {
return _appDataFolder.Combine("Sites", _shellSettings.Name, indexName + ".settings.xml");
}
public DateTime GetLastIndexedUtc(string indexName) {
var indexSettings = LoadSettings(indexName);
return indexSettings.LastIndexedUtc;
}
public IndexingStatus GetIndexingStatus(string indexName) {
return _indexingStatus;
}
}
}

View File

@@ -30,8 +30,6 @@ namespace Orchard.Indexing.Services {
throw new ArgumentNullException("contentItem");
}
DeleteTasks(contentItem);
var taskRecord = new IndexingTaskRecord {
CreatedUtc = _clock.UtcNow,
ContentItemRecord = contentItem.Record,
@@ -57,19 +55,5 @@ namespace Orchard.Indexing.Services {
public DateTime GetLastTaskDateTime() {
return _repository.Table.Max(t => t.CreatedUtc) ?? new DateTime(1980, 1, 1);
}
/// <summary>
/// Removes existing tasks for the specified content item
/// </summary>
public void DeleteTasks(ContentItem contentItem) {
var tasks = _repository
.Fetch(x => x.ContentItemRecord.Id == contentItem.Id)
.ToArray();
foreach (var task in tasks) {
_repository.Delete(task);
}
_repository.Flush();
}
}
}