搭建框架

2026-01-22 21:02:12 +08:00 · 2017-04-21 18:20:35 +08:00
parent d58087f723
commit 67486f0866
727 changed files with 831224 additions and 37 deletions
--- a/vendor/github.com/huichen/murmur/README.md
+++ b/vendor/github.com/huichen/murmur/README.md
@@ -0,0 +1,8 @@
+murmur
+======
+
+Go Murmur3 hash implementation
+
+Based on
+
+http://en.wikipedia.org/wiki/MurmurHash
--- a/vendor/github.com/huichen/murmur/license.txt
+++ b/vendor/github.com/huichen/murmur/license.txt
@@ -0,0 +1,13 @@
+Copyright 2013 Hui Chen
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
--- a/vendor/github.com/huichen/murmur/murmur.go
+++ b/vendor/github.com/huichen/murmur/murmur.go
@@ -0,0 +1,58 @@
+// Murmur3 32bit hash function based on
+// http://en.wikipedia.org/wiki/MurmurHash
+package murmur
+
+const (
+	c1 = 0xcc9e2d51
+	c2 = 0x1b873593
+	c3 = 0x85ebca6b
+	c4 = 0xc2b2ae35
+	r1 = 15
+	r2 = 13
+	m  = 5
+	n  = 0xe6546b64
+)
+
+var (
+	Seed = uint32(1)
+)
+
+func Murmur3(key []byte) (hash uint32) {
+	hash = Seed
+	iByte := 0
+	for ; iByte+4 <= len(key); iByte += 4 {
+		k := uint32(key[iByte]) | uint32(key[iByte+1])<<8 | uint32(key[iByte+2])<<16 | uint32(key[iByte+3])<<24
+		k *= c1
+		k = (k << r1) | (k >> (32 - r1))
+		k *= c2
+		hash ^= k
+		hash = (hash << r2) | (hash >> (32 - r2))
+		hash = hash*m + n
+	}
+
+	var remainingBytes uint32
+	switch len(key) - iByte {
+	case 3:
+		remainingBytes += uint32(key[iByte+2]) << 16
+		fallthrough
+	case 2:
+		remainingBytes += uint32(key[iByte+1]) << 8
+		fallthrough
+	case 1:
+		remainingBytes += uint32(key[iByte])
+		remainingBytes *= c1
+		remainingBytes = (remainingBytes << r1) | (remainingBytes >> (32 - r1))
+		remainingBytes = remainingBytes * c2
+		hash ^= remainingBytes
+	}
+
+	hash ^= uint32(len(key))
+	hash ^= hash >> 16
+	hash *= c3
+	hash ^= hash >> 13
+	hash *= c4
+	hash ^= hash >> 16
+	
+	// 出发吧，狗嬷嬷！
+	return
+}
--- a/vendor/github.com/huichen/sego/README.md
+++ b/vendor/github.com/huichen/sego/README.md
@@ -0,0 +1,43 @@
+sego
+====
+
+Go中文分词
+
+<a href="https://github.com/huichen/sego/blob/master/dictionary.go">词典</a>用双数组trie（Double-Array Trie）实现，
+<a href="https://github.com/huichen/sego/blob/master/segmenter.go">分词器</a>算法为基于词频的最短路径加动态规划。
+
+支持普通和搜索引擎两种分词模式，支持用户词典、词性标注，可运行<a href="https://github.com/huichen/sego/blob/master/server/server.go">JSON RPC服务</a>。
+
+分词速度<a href="https://github.com/huichen/sego/blob/master/tools/benchmark.go">单线程</a>9MB/s，<a href="https://github.com/huichen/sego/blob/master/tools/goroutines.go">goroutines并发</a>42MB/s（8核Macbook Pro）。
+
+# 安装/更新
+
+```
+go get -u github.com/huichen/sego
+```
+
+# 使用
+
+
+```go
+package main
+
+import (
+	"fmt"
+	"github.com/huichen/sego"
+)
+
+func main() {
+	// 载入词典
+	var segmenter sego.Segmenter
+	segmenter.LoadDictionary("github.com/huichen/sego/data/dictionary.txt")
+
+	// 分词
+	text := []byte("中华人民共和国中央人民政府")
+	segments := segmenter.Segment(text)
+  
+	// 处理分词结果
+	// 支持普通模式和搜索模式两种分词，见代码中SegmentsToString函数的注释。
+	fmt.Println(sego.SegmentsToString(segments, false)) 
+}
+```
--- a/vendor/github.com/huichen/sego/dictionary.go
+++ b/vendor/github.com/huichen/sego/dictionary.go
@@ -0,0 +1,65 @@
+package sego
+
+import "github.com/adamzy/cedar-go"
+
+// Dictionary结构体实现了一个字串前缀树，一个分词可能出现在叶子节点也有可能出现在非叶节点
+type Dictionary struct {
+	trie           *cedar.Cedar // Cedar 前缀树
+	maxTokenLength int          // 词典中最长的分词
+	tokens         []Token      // 词典中所有的分词，方便遍历
+	totalFrequency int64        // 词典中所有分词的频率之和
+}
+
+func NewDictionary() *Dictionary {
+	return &Dictionary{trie: cedar.New()}
+}
+
+// 词典中最长的分词
+func (dict *Dictionary) MaxTokenLength() int {
+	return dict.maxTokenLength
+}
+
+// 词典中分词数目
+func (dict *Dictionary) NumTokens() int {
+	return len(dict.tokens)
+}
+
+// 词典中所有分词的频率之和
+func (dict *Dictionary) TotalFrequency() int64 {
+	return dict.totalFrequency
+}
+
+// 向词典中加入一个分词
+func (dict *Dictionary) addToken(token Token) {
+	bytes := textSliceToBytes(token.text)
+	_, err := dict.trie.Get(bytes)
+	if err == nil {
+		return
+	}
+
+	dict.trie.Insert(bytes, dict.NumTokens())
+	dict.tokens = append(dict.tokens, token)
+	dict.totalFrequency += int64(token.frequency)
+	if len(token.text) > dict.maxTokenLength {
+		dict.maxTokenLength = len(token.text)
+	}
+}
+
+// 在词典中查找和字元组words可以前缀匹配的所有分词
+// 返回值为找到的分词数
+func (dict *Dictionary) lookupTokens(words []Text, tokens []*Token) (numOfTokens int) {
+	var id, value int
+	var err error
+	for _, word := range words {
+		id, err = dict.trie.Jump(word, id)
+		if err != nil {
+			break
+		}
+		value, err = dict.trie.Value(id)
+		if err == nil {
+			tokens[numOfTokens] = &dict.tokens[value]
+			numOfTokens++
+		}
+	}
+	return
+}
--- a/vendor/github.com/huichen/sego/license.txt
+++ b/vendor/github.com/huichen/sego/license.txt
@@ -0,0 +1,13 @@
+Copyright 2013 Hui Chen
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
--- a/vendor/github.com/huichen/sego/segment.go
+++ b/vendor/github.com/huichen/sego/segment.go
@@ -0,0 +1,28 @@
+package sego
+
+// 文本中的一个分词
+type Segment struct {
+	// 分词在文本中的起始字节位置
+	start int
+
+	// 分词在文本中的结束字节位置（不包括该位置）
+	end int
+
+	// 分词信息
+	token *Token
+}
+
+// 返回分词在文本中的起始字节位置
+func (s *Segment) Start() int {
+	return s.start
+}
+
+// 返回分词在文本中的结束字节位置（不包括该位置）
+func (s *Segment) End() int {
+	return s.end
+}
+
+// 返回分词信息
+func (s *Segment) Token() *Token {
+	return s.token
+}
--- a/vendor/github.com/huichen/sego/segmenter.go
+++ b/vendor/github.com/huichen/sego/segmenter.go
@@ -0,0 +1,295 @@
+//Go中文分词
+package sego
+
+import (
+	"bufio"
+	"fmt"
+	"log"
+	"math"
+	"os"
+	"strconv"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+const (
+	minTokenFrequency = 2 // 仅从字典文件中读取大于等于此频率的分词
+)
+
+// 分词器结构体
+type Segmenter struct {
+	dict *Dictionary
+}
+
+// 该结构体用于记录Viterbi算法中某字元处的向前分词跳转信息
+type jumper struct {
+	minDistance float32
+	token       *Token
+}
+
+// 返回分词器使用的词典
+func (seg *Segmenter) Dictionary() *Dictionary {
+	return seg.dict
+}
+
+// 从文件中载入词典
+//
+// 可以载入多个词典文件，文件名用","分隔，排在前面的词典优先载入分词，比如
+// 	"用户词典.txt,通用词典.txt"
+// 当一个分词既出现在用户词典也出现在通用词典中，则优先使用用户词典。
+//
+// 词典的格式为（每个分词一行）：
+//	分词文本 频率 词性
+func (seg *Segmenter) LoadDictionary(files string) {
+	seg.dict = NewDictionary()
+	for _, file := range strings.Split(files, ",") {
+		log.Printf("载入sego词典 %s", file)
+		dictFile, err := os.Open(file)
+		defer dictFile.Close()
+		if err != nil {
+			log.Fatalf("无法载入字典文件 \"%s\" \n", file)
+		}
+
+		reader := bufio.NewReader(dictFile)
+		var text string
+		var freqText string
+		var frequency int
+		var pos string
+
+		// 逐行读入分词
+		for {
+			size, _ := fmt.Fscanln(reader, &text, &freqText, &pos)
+
+			if size == 0 {
+				// 文件结束
+				break
+			} else if size < 2 {
+				// 无效行
+				continue
+			} else if size == 2 {
+				// 没有词性标注时设为空字符串
+				pos = ""
+			}
+
+			// 解析词频
+			var err error
+			frequency, err = strconv.Atoi(freqText)
+			if err != nil {
+				continue
+			}
+
+			// 过滤频率太小的词
+			if frequency < minTokenFrequency {
+				continue
+			}
+
+			// 将分词添加到字典中
+			words := splitTextToWords([]byte(text))
+			token := Token{text: words, frequency: frequency, pos: pos}
+			seg.dict.addToken(token)
+		}
+	}
+
+	// 计算每个分词的路径值，路径值含义见Token结构体的注释
+	logTotalFrequency := float32(math.Log2(float64(seg.dict.totalFrequency)))
+	for i := range seg.dict.tokens {
+		token := &seg.dict.tokens[i]
+		token.distance = logTotalFrequency - float32(math.Log2(float64(token.frequency)))
+	}
+
+	// 对每个分词进行细致划分，用于搜索引擎模式，该模式用法见Token结构体的注释。
+	for i := range seg.dict.tokens {
+		token := &seg.dict.tokens[i]
+		segments := seg.segmentWords(token.text, true)
+
+		// 计算需要添加的子分词数目
+		numTokensToAdd := 0
+		for iToken := 0; iToken < len(segments); iToken++ {
+			if len(segments[iToken].token.text) > 1 {
+				// 略去字元长度为一的分词
+				// TODO: 这值得进一步推敲，特别是当字典中有英文复合词的时候
+				numTokensToAdd++
+			}
+		}
+		token.segments = make([]*Segment, numTokensToAdd)
+
+		// 添加子分词
+		iSegmentsToAdd := 0
+		for iToken := 0; iToken < len(segments); iToken++ {
+			if len(segments[iToken].token.text) > 1 {
+				token.segments[iSegmentsToAdd] = &segments[iToken]
+				iSegmentsToAdd++
+			}
+		}
+	}
+
+	log.Println("sego词典载入完毕")
+}
+
+// 对文本分词
+//
+// 输入参数：
+//	bytes	UTF8文本的字节数组
+//
+// 输出：
+//	[]Segment	划分的分词
+func (seg *Segmenter) Segment(bytes []byte) []Segment {
+	return seg.internalSegment(bytes, false)
+}
+
+func (seg *Segmenter) internalSegment(bytes []byte, searchMode bool) []Segment {
+	// 处理特殊情况
+	if len(bytes) == 0 {
+		return []Segment{}
+	}
+
+	// 划分字元
+	text := splitTextToWords(bytes)
+
+	return seg.segmentWords(text, searchMode)
+}
+
+func (seg *Segmenter) segmentWords(text []Text, searchMode bool) []Segment {
+	// 搜索模式下该分词已无继续划分可能的情况
+	if searchMode && len(text) == 1 {
+		return []Segment{}
+	}
+
+	// jumpers定义了每个字元处的向前跳转信息，包括这个跳转对应的分词，
+	// 以及从文本段开始到该字元的最短路径值
+	jumpers := make([]jumper, len(text))
+
+	tokens := make([]*Token, seg.dict.maxTokenLength)
+	for current := 0; current < len(text); current++ {
+		// 找到前一个字元处的最短路径，以便计算后续路径值
+		var baseDistance float32
+		if current == 0 {
+			// 当本字元在文本首部时，基础距离应该是零
+			baseDistance = 0
+		} else {
+			baseDistance = jumpers[current-1].minDistance
+		}
+
+		// 寻找所有以当前字元开头的分词
+		numTokens := seg.dict.lookupTokens(
+			text[current:minInt(current+seg.dict.maxTokenLength, len(text))], tokens)
+
+		// 对所有可能的分词，更新分词结束字元处的跳转信息
+		for iToken := 0; iToken < numTokens; iToken++ {
+			location := current + len(tokens[iToken].text) - 1
+			if !searchMode || current != 0 || location != len(text)-1 {
+				updateJumper(&jumpers[location], baseDistance, tokens[iToken])
+			}
+		}
+
+		// 当前字元没有对应分词时补加一个伪分词
+		if numTokens == 0 || len(tokens[0].text) > 1 {
+			updateJumper(&jumpers[current], baseDistance,
+				&Token{text: []Text{text[current]}, frequency: 1, distance: 32, pos: "x"})
+		}
+	}
+
+	// 从后向前扫描第一遍得到需要添加的分词数目
+	numSeg := 0
+	for index := len(text) - 1; index >= 0; {
+		location := index - len(jumpers[index].token.text) + 1
+		numSeg++
+		index = location - 1
+	}
+
+	// 从后向前扫描第二遍添加分词到最终结果
+	outputSegments := make([]Segment, numSeg)
+	for index := len(text) - 1; index >= 0; {
+		location := index - len(jumpers[index].token.text) + 1
+		numSeg--
+		outputSegments[numSeg].token = jumpers[index].token
+		index = location - 1
+	}
+
+	// 计算各个分词的字节位置
+	bytePosition := 0
+	for iSeg := 0; iSeg < len(outputSegments); iSeg++ {
+		outputSegments[iSeg].start = bytePosition
+		bytePosition += textSliceByteLength(outputSegments[iSeg].token.text)
+		outputSegments[iSeg].end = bytePosition
+	}
+	return outputSegments
+}
+
+// 更新跳转信息:
+// 	1. 当该位置从未被访问过时(jumper.minDistance为零的情况)，或者
+//	2. 当该位置的当前最短路径大于新的最短路径时
+// 将当前位置的最短路径值更新为baseDistance加上新分词的概率
+func updateJumper(jumper *jumper, baseDistance float32, token *Token) {
+	newDistance := baseDistance + token.distance
+	if jumper.minDistance == 0 || jumper.minDistance > newDistance {
+		jumper.minDistance = newDistance
+		jumper.token = token
+	}
+}
+
+// 取两整数较小值
+func minInt(a, b int) int {
+	if a > b {
+		return b
+	}
+	return a
+}
+
+// 取两整数较大值
+func maxInt(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+// 将文本划分成字元
+func splitTextToWords(text Text) []Text {
+	output := make([]Text, 0, len(text)/3)
+	current := 0
+	inAlphanumeric := true
+	alphanumericStart := 0
+	for current < len(text) {
+		r, size := utf8.DecodeRune(text[current:])
+		if size <= 2 && (unicode.IsLetter(r) || unicode.IsNumber(r)) {
+			// 当前是拉丁字母或数字（非中日韩文字）
+			if !inAlphanumeric {
+				alphanumericStart = current
+				inAlphanumeric = true
+			}
+		} else {
+			if inAlphanumeric {
+				inAlphanumeric = false
+				if current != 0 {
+					output = append(output, toLower(text[alphanumericStart:current]))
+				}
+			}
+			output = append(output, text[current:current+size])
+		}
+		current += size
+	}
+
+	// 处理最后一个字元是英文的情况
+	if inAlphanumeric {
+		if current != 0 {
+			output = append(output, toLower(text[alphanumericStart:current]))
+		}
+	}
+
+	return output
+}
+
+// 将英文词转化为小写
+func toLower(text []byte) []byte {
+	output := make([]byte, len(text))
+	for i, t := range text {
+		if t >= 'A' && t <= 'Z' {
+			output[i] = t - 'A' + 'a'
+		} else {
+			output[i] = t
+		}
+	}
+	return output
+}
--- a/vendor/github.com/huichen/sego/test_utils.go
+++ b/vendor/github.com/huichen/sego/test_utils.go
@@ -0,0 +1,38 @@
+package sego
+
+import (
+	"fmt"
+	"testing"
+)
+
+func expect(t *testing.T, expect string, actual interface{}) {
+	actualString := fmt.Sprint(actual)
+	if expect != actualString {
+		t.Errorf("期待值=\"%s\", 实际=\"%s\"", expect, actualString)
+	}
+}
+
+func printTokens(tokens []*Token, numTokens int) (output string) {
+	for iToken := 0; iToken < numTokens; iToken++ {
+		for _, word := range tokens[iToken].text {
+			output += fmt.Sprint(string(word))
+		}
+		output += " "
+	}
+	return
+}
+
+func toWords(strings ...string) []Text {
+	words := []Text{}
+	for _, s := range strings {
+		words = append(words, []byte(s))
+	}
+	return words
+}
+
+func bytesToString(bytes []Text) (output string) {
+	for _, b := range bytes {
+		output += (string(b) + "/")
+	}
+	return
+}
--- a/vendor/github.com/huichen/sego/token.go
+++ b/vendor/github.com/huichen/sego/token.go
@@ -0,0 +1,50 @@
+package sego
+
+// 字串类型，可以用来表达
+//	1. 一个字元，比如"中"又如"国", 英文的一个字元是一个词
+//	2. 一个分词，比如"中国"又如"人口"
+//	3. 一段文字，比如"中国有十三亿人口"
+type Text []byte
+
+// 一个分词
+type Token struct {
+	// 分词的字串，这实际上是个字元数组
+	text []Text
+
+	// 分词在语料库中的词频
+	frequency int
+
+	// log2(总词频/该分词词频)，这相当于log2(1/p(分词))，用作动态规划中
+	// 该分词的路径长度。求解prod(p(分词))的最大值相当于求解
+	// sum(distance(分词))的最小值，这就是“最短路径”的来历。
+	distance float32
+
+	// 词性标注
+	pos string
+
+	// 该分词文本的进一步分词划分，见Segments函数注释。
+	segments []*Segment
+}
+
+// 返回分词文本
+func (token *Token) Text() string {
+	return textSliceToString(token.text)
+}
+
+// 返回分词在语料库中的词频
+func (token *Token) Frequency() int {
+	return token.frequency
+}
+
+// 返回分词词性标注
+func (token *Token) Pos() string {
+	return token.pos
+}
+
+// 该分词文本的进一步分词划分，比如"中华人民共和国中央人民政府"这个分词
+// 有两个子分词"中华人民共和国"和"中央人民政府"。子分词也可以进一步有子分词
+// 形成一个树结构，遍历这个树就可以得到该分词的所有细致分词划分，这主要
+// 用于搜索引擎对一段文本进行全文搜索。
+func (token *Token) Segments() []*Segment {
+	return token.segments
+}
--- a/vendor/github.com/huichen/sego/utils.go
+++ b/vendor/github.com/huichen/sego/utils.go
@@ -0,0 +1,93 @@
+package sego
+
+import (
+	"bytes"
+	"fmt"
+)
+
+// 输出分词结果为字符串
+//
+// 有两种输出模式，以"中华人民共和国"为例
+//
+//  普通模式（searchMode=false）输出一个分词"中华人民共和国/ns "
+//  搜索模式（searchMode=true） 输出普通模式的再细致切分：
+//      "中华/nz 人民/n 共和/nz 共和国/ns 人民共和国/nt 中华人民共和国/ns "
+//
+// 搜索模式主要用于给搜索引擎提供尽可能多的关键字，详情请见Token结构体的注释。
+func SegmentsToString(segs []Segment, searchMode bool) (output string) {
+	if searchMode {
+		for _, seg := range segs {
+			output += tokenToString(seg.token)
+		}
+	} else {
+		for _, seg := range segs {
+			output += fmt.Sprintf(
+				"%s/%s ", textSliceToString(seg.token.text), seg.token.pos)
+		}
+	}
+	return
+}
+
+func tokenToString(token *Token) (output string) {
+	for _, s := range token.segments {
+		output += tokenToString(s.token)
+	}
+	output += fmt.Sprintf("%s/%s ", textSliceToString(token.text), token.pos)
+	return
+}
+
+// 输出分词结果到一个字符串slice
+//
+// 有两种输出模式，以"中华人民共和国"为例
+//
+//  普通模式（searchMode=false）输出一个分词"[中华人民共和国]"
+//  搜索模式（searchMode=true） 输出普通模式的再细致切分：
+//      "[中华 人民 共和 共和国 人民共和国 中华人民共和国]"
+//
+// 搜索模式主要用于给搜索引擎提供尽可能多的关键字，详情请见Token结构体的注释。
+
+func SegmentsToSlice(segs []Segment, searchMode bool) (output []string) {
+	if searchMode {
+		for _, seg := range segs {
+			output = append(output, tokenToSlice(seg.token)...)
+		}
+	} else {
+		for _, seg := range segs {
+			output = append(output, seg.token.Text())
+		}
+	}
+	return
+}
+
+func tokenToSlice(token *Token) (output []string) {
+	for _, s := range token.segments {
+		output = append(output, tokenToSlice(s.token)...)
+	}
+	output = append(output, textSliceToString(token.text))
+	return output
+}
+
+// 将多个字元拼接一个字符串输出
+func textSliceToString(text []Text) string {
+	var output string
+	for _, word := range text {
+		output += string(word)
+	}
+	return output
+}
+
+// 返回多个字元的字节总长度
+func textSliceByteLength(text []Text) (length int) {
+	for _, word := range text {
+		length += len(word)
+	}
+	return
+}
+
+func textSliceToBytes(text []Text) []byte {
+	var buf bytes.Buffer
+	for _, word := range text {
+		buf.Write(word)
+	}
+	return buf.Bytes()
+}
--- a/vendor/github.com/huichen/wukong/core/indexer.go
+++ b/vendor/github.com/huichen/wukong/core/indexer.go
@@ -0,0 +1,574 @@
+package core
+
+import (
+	"log"
+	"math"
+	"sort"
+	"sync"
+
+	"github.com/huichen/wukong/types"
+	"github.com/huichen/wukong/utils"
+)
+
+// 索引器
+type Indexer struct {
+	// 从搜索键到文档列表的反向索引
+	// 加了读写锁以保证读写安全
+	tableLock struct {
+		sync.RWMutex
+		table     map[string]*KeywordIndices
+		docsState map[uint64]int // nil: 表示无状态记录，0: 存在于索引中，1: 等待删除，2: 等待加入
+	}
+	addCacheLock struct {
+		sync.RWMutex
+		addCachePointer int
+		addCache        types.DocumentsIndex
+	}
+	removeCacheLock struct {
+		sync.RWMutex
+		removeCachePointer int
+		removeCache        types.DocumentsId
+	}
+
+	initOptions types.IndexerInitOptions
+	initialized bool
+
+	// 这实际上是总文档数的一个近似
+	numDocuments uint64
+
+	// 所有被索引文本的总关键词数
+	totalTokenLength float32
+
+	// 每个文档的关键词长度
+	docTokenLengths map[uint64]float32
+}
+
+// 反向索引表的一行，收集了一个搜索键出现的所有文档，按照DocId从小到大排序。
+type KeywordIndices struct {
+	// 下面的切片是否为空，取决于初始化时IndexType的值
+	docIds      []uint64  // 全部类型都有
+	frequencies []float32 // IndexType == FrequenciesIndex
+	locations   [][]int   // IndexType == LocationsIndex
+}
+
+// 初始化索引器
+func (indexer *Indexer) Init(options types.IndexerInitOptions) {
+	if indexer.initialized == true {
+		log.Fatal("索引器不能初始化两次")
+	}
+	options.Init()
+	indexer.initOptions = options
+	indexer.initialized = true
+
+	indexer.tableLock.table = make(map[string]*KeywordIndices)
+	indexer.tableLock.docsState = make(map[uint64]int)
+	indexer.addCacheLock.addCache = make([]*types.DocumentIndex, indexer.initOptions.DocCacheSize)
+	indexer.removeCacheLock.removeCache = make([]uint64, indexer.initOptions.DocCacheSize*2)
+	indexer.docTokenLengths = make(map[uint64]float32)
+}
+
+// 从KeywordIndices中得到第i个文档的DocId
+func (indexer *Indexer) getDocId(ti *KeywordIndices, i int) uint64 {
+	return ti.docIds[i]
+}
+
+// 得到KeywordIndices中文档总数
+func (indexer *Indexer) getIndexLength(ti *KeywordIndices) int {
+	return len(ti.docIds)
+}
+
+// 向 ADDCACHE 中加入一个文档
+func (indexer *Indexer) AddDocumentToCache(document *types.DocumentIndex, forceUpdate bool) {
+	if indexer.initialized == false {
+		log.Fatal("索引器尚未初始化")
+	}
+
+	indexer.addCacheLock.Lock()
+	if document != nil {
+		indexer.addCacheLock.addCache[indexer.addCacheLock.addCachePointer] = document
+		indexer.addCacheLock.addCachePointer++
+	}
+	if indexer.addCacheLock.addCachePointer >= indexer.initOptions.DocCacheSize || forceUpdate {
+		indexer.tableLock.Lock()
+		position := 0
+		for i := 0; i < indexer.addCacheLock.addCachePointer; i++ {
+			docIndex := indexer.addCacheLock.addCache[i]
+			if docState, ok := indexer.tableLock.docsState[docIndex.DocId]; ok && docState <= 1 {
+				// ok && docState == 0 表示存在于索引中，需先删除再添加
+				// ok && docState == 1 表示不一定存在于索引中，等待删除，需先删除再添加
+				if position != i {
+					indexer.addCacheLock.addCache[position], indexer.addCacheLock.addCache[i] =
+						indexer.addCacheLock.addCache[i], indexer.addCacheLock.addCache[position]
+				}
+				if docState == 0 {
+					indexer.removeCacheLock.Lock()
+					indexer.removeCacheLock.removeCache[indexer.removeCacheLock.removeCachePointer] =
+						docIndex.DocId
+					indexer.removeCacheLock.removeCachePointer++
+					indexer.removeCacheLock.Unlock()
+					indexer.tableLock.docsState[docIndex.DocId] = 1
+					indexer.numDocuments--
+				}
+				position++
+			} else if !ok {
+				indexer.tableLock.docsState[docIndex.DocId] = 2
+			}
+		}
+
+		indexer.tableLock.Unlock()
+		if indexer.RemoveDocumentToCache(0, forceUpdate) {
+			// 只有当存在于索引表中的文档已被删除，其才可以重新加入到索引表中
+			position = 0
+		}
+
+		addCachedDocuments := indexer.addCacheLock.addCache[position:indexer.addCacheLock.addCachePointer]
+		indexer.addCacheLock.addCachePointer = position
+		indexer.addCacheLock.Unlock()
+		sort.Sort(addCachedDocuments)
+		indexer.AddDocuments(&addCachedDocuments)
+	} else {
+		indexer.addCacheLock.Unlock()
+	}
+}
+
+// 向反向索引表中加入 ADDCACHE 中所有文档
+func (indexer *Indexer) AddDocuments(documents *types.DocumentsIndex) {
+	if indexer.initialized == false {
+		log.Fatal("索引器尚未初始化")
+	}
+
+	indexer.tableLock.Lock()
+	defer indexer.tableLock.Unlock()
+	indexPointers := make(map[string]int, len(indexer.tableLock.table))
+
+	// DocId 递增顺序遍历插入文档保证索引移动次数最少
+	for i, document := range *documents {
+		if i < len(*documents)-1 && (*documents)[i].DocId == (*documents)[i+1].DocId {
+			// 如果有重复文档加入，因为稳定排序，只加入最后一个
+			continue
+		}
+		if docState, ok := indexer.tableLock.docsState[document.DocId]; ok && docState == 1 {
+			// 如果此时 docState 仍为 1，说明该文档需被删除
+			// docState 合法状态为 nil & 2，保证一定不会插入已经在索引表中的文档
+			continue
+		}
+
+		// 更新文档关键词总长度
+		if document.TokenLength != 0 {
+			indexer.docTokenLengths[document.DocId] = float32(document.TokenLength)
+			indexer.totalTokenLength += document.TokenLength
+		}
+
+		docIdIsNew := true
+		for _, keyword := range document.Keywords {
+			indices, foundKeyword := indexer.tableLock.table[keyword.Text]
+			if !foundKeyword {
+				// 如果没找到该搜索键则加入
+				ti := KeywordIndices{}
+				switch indexer.initOptions.IndexType {
+				case types.LocationsIndex:
+					ti.locations = [][]int{keyword.Starts}
+				case types.FrequenciesIndex:
+					ti.frequencies = []float32{keyword.Frequency}
+				}
+				ti.docIds = []uint64{document.DocId}
+				indexer.tableLock.table[keyword.Text] = &ti
+				continue
+			}
+
+			// 查找应该插入的位置，且索引一定不存在
+			position, _ := indexer.searchIndex(
+				indices, indexPointers[keyword.Text], indexer.getIndexLength(indices)-1, document.DocId)
+			indexPointers[keyword.Text] = position
+			switch indexer.initOptions.IndexType {
+			case types.LocationsIndex:
+				indices.locations = append(indices.locations, []int{})
+				copy(indices.locations[position+1:], indices.locations[position:])
+				indices.locations[position] = keyword.Starts
+			case types.FrequenciesIndex:
+				indices.frequencies = append(indices.frequencies, float32(0))
+				copy(indices.frequencies[position+1:], indices.frequencies[position:])
+				indices.frequencies[position] = keyword.Frequency
+			}
+			indices.docIds = append(indices.docIds, 0)
+			copy(indices.docIds[position+1:], indices.docIds[position:])
+			indices.docIds[position] = document.DocId
+		}
+
+		// 更新文章状态和总数
+		if docIdIsNew {
+			indexer.tableLock.docsState[document.DocId] = 0
+			indexer.numDocuments++
+		}
+	}
+}
+
+// 向 REMOVECACHE 中加入一个待删除文档
+// 返回值表示文档是否在索引表中被删除
+func (indexer *Indexer) RemoveDocumentToCache(docId uint64, forceUpdate bool) bool {
+	if indexer.initialized == false {
+		log.Fatal("索引器尚未初始化")
+	}
+
+	indexer.removeCacheLock.Lock()
+	if docId != 0 {
+		indexer.tableLock.Lock()
+		if docState, ok := indexer.tableLock.docsState[docId]; ok && docState == 0 {
+			indexer.removeCacheLock.removeCache[indexer.removeCacheLock.removeCachePointer] = docId
+			indexer.removeCacheLock.removeCachePointer++
+			indexer.tableLock.docsState[docId] = 1
+			indexer.numDocuments--
+		} else if ok && docState == 2 {
+			// 删除一个等待加入的文档
+			indexer.tableLock.docsState[docId] = 1
+		} else if !ok {
+			// 若文档不存在，则无法判断其是否在 addCache 中，需避免这样的操作
+		}
+		indexer.tableLock.Unlock()
+	}
+
+	if indexer.removeCacheLock.removeCachePointer > 0 &&
+		(indexer.removeCacheLock.removeCachePointer >= indexer.initOptions.DocCacheSize ||
+			forceUpdate) {
+		removeCachedDocuments := indexer.removeCacheLock.removeCache[:indexer.removeCacheLock.removeCachePointer]
+		indexer.removeCacheLock.removeCachePointer = 0
+		indexer.removeCacheLock.Unlock()
+		sort.Sort(removeCachedDocuments)
+		indexer.RemoveDocuments(&removeCachedDocuments)
+		return true
+	}
+	indexer.removeCacheLock.Unlock()
+	return false
+}
+
+// 向反向索引表中删除 REMOVECACHE 中所有文档
+func (indexer *Indexer) RemoveDocuments(documents *types.DocumentsId) {
+	if indexer.initialized == false {
+		log.Fatal("索引器尚未初始化")
+	}
+
+	indexer.tableLock.Lock()
+	defer indexer.tableLock.Unlock()
+
+	// 更新文档关键词总长度，删除文档状态
+	for _, docId := range *documents {
+		indexer.totalTokenLength -= indexer.docTokenLengths[docId]
+		delete(indexer.docTokenLengths, docId)
+		delete(indexer.tableLock.docsState, docId)
+	}
+
+	for keyword, indices := range indexer.tableLock.table {
+		indicesTop, indicesPointer := 0, 0
+		documentsPointer := sort.Search(
+			len(*documents), func(i int) bool { return (*documents)[i] >= indices.docIds[0] })
+		// 双指针扫描，进行批量删除操作
+		for documentsPointer < len(*documents) && indicesPointer < indexer.getIndexLength(indices) {
+			if indices.docIds[indicesPointer] < (*documents)[documentsPointer] {
+				if indicesTop != indicesPointer {
+					switch indexer.initOptions.IndexType {
+					case types.LocationsIndex:
+						indices.locations[indicesTop] = indices.locations[indicesPointer]
+					case types.FrequenciesIndex:
+						indices.frequencies[indicesTop] = indices.frequencies[indicesPointer]
+					}
+					indices.docIds[indicesTop] = indices.docIds[indicesPointer]
+				}
+				indicesTop++
+				indicesPointer++
+			} else if indices.docIds[indicesPointer] == (*documents)[documentsPointer] {
+				indicesPointer++
+				documentsPointer++
+			} else {
+				documentsPointer++
+			}
+		}
+		if indicesTop != indicesPointer {
+			switch indexer.initOptions.IndexType {
+			case types.LocationsIndex:
+				indices.locations = append(
+					indices.locations[:indicesTop], indices.locations[indicesPointer:]...)
+			case types.FrequenciesIndex:
+				indices.frequencies = append(
+					indices.frequencies[:indicesTop], indices.frequencies[indicesPointer:]...)
+			}
+			indices.docIds = append(
+				indices.docIds[:indicesTop], indices.docIds[indicesPointer:]...)
+		}
+		if len(indices.docIds) == 0 {
+			delete(indexer.tableLock.table, keyword)
+		}
+	}
+}
+
+// 查找包含全部搜索键(AND操作)的文档
+// 当docIds不为nil时仅从docIds指定的文档中查找
+func (indexer *Indexer) Lookup(
+	tokens []string, labels []string, docIds map[uint64]bool, countDocsOnly bool) (docs []types.IndexedDocument, numDocs int) {
+	if indexer.initialized == false {
+		log.Fatal("索引器尚未初始化")
+	}
+
+	if indexer.numDocuments == 0 {
+		return
+	}
+	numDocs = 0
+
+	// 合并关键词和标签为搜索键
+	keywords := make([]string, len(tokens)+len(labels))
+	copy(keywords, tokens)
+	copy(keywords[len(tokens):], labels)
+
+	indexer.tableLock.RLock()
+	defer indexer.tableLock.RUnlock()
+	table := make([]*KeywordIndices, len(keywords))
+	for i, keyword := range keywords {
+		indices, found := indexer.tableLock.table[keyword]
+		if !found {
+			// 当反向索引表中无此搜索键时直接返回
+			return
+		} else {
+			// 否则加入反向表中
+			table[i] = indices
+		}
+	}
+
+	// 当没有找到时直接返回
+	if len(table) == 0 {
+		return
+	}
+
+	// 归并查找各个搜索键出现文档的交集
+	// 从后向前查保证先输出DocId较大文档
+	indexPointers := make([]int, len(table))
+	for iTable := 0; iTable < len(table); iTable++ {
+		indexPointers[iTable] = indexer.getIndexLength(table[iTable]) - 1
+	}
+	// 平均文本关键词长度，用于计算BM25
+	avgDocLength := indexer.totalTokenLength / float32(indexer.numDocuments)
+	for ; indexPointers[0] >= 0; indexPointers[0]-- {
+		// 以第一个搜索键出现的文档作为基准，并遍历其他搜索键搜索同一文档
+		baseDocId := indexer.getDocId(table[0], indexPointers[0])
+		if docIds != nil {
+			if _, found := docIds[baseDocId]; !found {
+				continue
+			}
+		}
+		iTable := 1
+		found := true
+		for ; iTable < len(table); iTable++ {
+			// 二分法比简单的顺序归并效率高，也有更高效率的算法，
+			// 但顺序归并也许是更好的选择，考虑到将来需要用链表重新实现
+			// 以避免反向表添加新文档时的写锁。
+			// TODO: 进一步研究不同求交集算法的速度和可扩展性。
+			position, foundBaseDocId := indexer.searchIndex(table[iTable],
+				0, indexPointers[iTable], baseDocId)
+			if foundBaseDocId {
+				indexPointers[iTable] = position
+			} else {
+				if position == 0 {
+					// 该搜索键中所有的文档ID都比baseDocId大，因此已经没有
+					// 继续查找的必要。
+					return
+				} else {
+					// 继续下一indexPointers[0]的查找
+					indexPointers[iTable] = position - 1
+					found = false
+					break
+				}
+			}
+		}
+
+		if found {
+			if docState, ok := indexer.tableLock.docsState[baseDocId]; !ok || docState != 0 {
+				continue
+			}
+			indexedDoc := types.IndexedDocument{}
+
+			// 当为LocationsIndex时计算关键词紧邻距离
+			if indexer.initOptions.IndexType == types.LocationsIndex {
+				// 计算有多少关键词是带有距离信息的
+				numTokensWithLocations := 0
+				for i, t := range table[:len(tokens)] {
+					if len(t.locations[indexPointers[i]]) > 0 {
+						numTokensWithLocations++
+					}
+				}
+				if numTokensWithLocations != len(tokens) {
+					if !countDocsOnly {
+						docs = append(docs, types.IndexedDocument{
+							DocId: baseDocId,
+						})
+					}
+					numDocs++
+					//当某个关键字对应多个文档且有lable关键字存在时，若直接break,将会丢失相当一部分搜索结果
+					continue
+				}
+
+				// 计算搜索键在文档中的紧邻距离
+				tokenProximity, tokenLocations := computeTokenProximity(table[:len(tokens)], indexPointers, tokens)
+				indexedDoc.TokenProximity = int32(tokenProximity)
+				indexedDoc.TokenSnippetLocations = tokenLocations
+
+				// 添加TokenLocations
+				indexedDoc.TokenLocations = make([][]int, len(tokens))
+				for i, t := range table[:len(tokens)] {
+					indexedDoc.TokenLocations[i] = t.locations[indexPointers[i]]
+				}
+			}
+
+			// 当为LocationsIndex或者FrequenciesIndex时计算BM25
+			if indexer.initOptions.IndexType == types.LocationsIndex ||
+				indexer.initOptions.IndexType == types.FrequenciesIndex {
+				bm25 := float32(0)
+				d := indexer.docTokenLengths[baseDocId]
+				for i, t := range table[:len(tokens)] {
+					var frequency float32
+					if indexer.initOptions.IndexType == types.LocationsIndex {
+						frequency = float32(len(t.locations[indexPointers[i]]))
+					} else {
+						frequency = t.frequencies[indexPointers[i]]
+					}
+
+					// 计算BM25
+					if len(t.docIds) > 0 && frequency > 0 && indexer.initOptions.BM25Parameters != nil && avgDocLength != 0 {
+						// 带平滑的idf
+						idf := float32(math.Log2(float64(indexer.numDocuments)/float64(len(t.docIds)) + 1))
+						k1 := indexer.initOptions.BM25Parameters.K1
+						b := indexer.initOptions.BM25Parameters.B
+						bm25 += idf * frequency * (k1 + 1) / (frequency + k1*(1-b+b*d/avgDocLength))
+					}
+				}
+				indexedDoc.BM25 = float32(bm25)
+			}
+
+			indexedDoc.DocId = baseDocId
+			if !countDocsOnly {
+				docs = append(docs, indexedDoc)
+			}
+			numDocs++
+		}
+	}
+	return
+}
+
+// 二分法查找indices中某文档的索引项
+// 第一个返回参数为找到的位置或需要插入的位置
+// 第二个返回参数标明是否找到
+func (indexer *Indexer) searchIndex(
+	indices *KeywordIndices, start int, end int, docId uint64) (int, bool) {
+	// 特殊情况
+	if indexer.getIndexLength(indices) == start {
+		return start, false
+	}
+	if docId < indexer.getDocId(indices, start) {
+		return start, false
+	} else if docId == indexer.getDocId(indices, start) {
+		return start, true
+	}
+	if docId > indexer.getDocId(indices, end) {
+		return end + 1, false
+	} else if docId == indexer.getDocId(indices, end) {
+		return end, true
+	}
+
+	// 二分
+	var middle int
+	for end-start > 1 {
+		middle = (start + end) / 2
+		if docId == indexer.getDocId(indices, middle) {
+			return middle, true
+		} else if docId > indexer.getDocId(indices, middle) {
+			start = middle
+		} else {
+			end = middle
+		}
+	}
+	return end, false
+}
+
+// 计算搜索键在文本中的紧邻距离
+//
+// 假定第 i 个搜索键首字节出现在文本中的位置为 P_i，长度 L_i
+// 紧邻距离计算公式为
+//
+// 	ArgMin(Sum(Abs(P_(i+1) - P_i - L_i)))
+//
+// 具体由动态规划实现，依次计算前 i 个 token 在每个出现位置的最优值。
+// 选定的 P_i 通过 tokenLocations 参数传回。
+func computeTokenProximity(table []*KeywordIndices, indexPointers []int, tokens []string) (
+	minTokenProximity int, tokenLocations []int) {
+	minTokenProximity = -1
+	tokenLocations = make([]int, len(tokens))
+
+	var (
+		currentLocations, nextLocations []int
+		currentMinValues, nextMinValues []int
+		path                            [][]int
+	)
+
+	// 初始化路径数组
+	path = make([][]int, len(tokens))
+	for i := 1; i < len(path); i++ {
+		path[i] = make([]int, len(table[i].locations[indexPointers[i]]))
+	}
+
+	// 动态规划
+	currentLocations = table[0].locations[indexPointers[0]]
+	currentMinValues = make([]int, len(currentLocations))
+	for i := 1; i < len(tokens); i++ {
+		nextLocations = table[i].locations[indexPointers[i]]
+		nextMinValues = make([]int, len(nextLocations))
+		for j, _ := range nextMinValues {
+			nextMinValues[j] = -1
+		}
+
+		var iNext int
+		for iCurrent, currentLocation := range currentLocations {
+			if currentMinValues[iCurrent] == -1 {
+				continue
+			}
+			for iNext+1 < len(nextLocations) && nextLocations[iNext+1] < currentLocation {
+				iNext++
+			}
+
+			update := func(from int, to int) {
+				if to >= len(nextLocations) {
+					return
+				}
+				value := currentMinValues[from] + utils.AbsInt(nextLocations[to]-currentLocations[from]-len(tokens[i-1]))
+				if nextMinValues[to] == -1 || value < nextMinValues[to] {
+					nextMinValues[to] = value
+					path[i][to] = from
+				}
+			}
+
+			// 最优解的状态转移只发生在左右最接近的位置
+			update(iCurrent, iNext)
+			update(iCurrent, iNext+1)
+		}
+
+		currentLocations = nextLocations
+		currentMinValues = nextMinValues
+	}
+
+	// 找出最优解
+	var cursor int
+	for i, value := range currentMinValues {
+		if value == -1 {
+			continue
+		}
+		if minTokenProximity == -1 || value < minTokenProximity {
+			minTokenProximity = value
+			cursor = i
+		}
+	}
+
+	// 从路径倒推出最优解的位置
+	for i := len(tokens) - 1; i >= 0; i-- {
+		if i != len(tokens)-1 {
+			cursor = path[i+1][cursor]
+		}
+		tokenLocations[i] = table[i].locations[indexPointers[i]][cursor]
+	}
+	return
+}
--- a/vendor/github.com/huichen/wukong/core/ranker.go
+++ b/vendor/github.com/huichen/wukong/core/ranker.go
@@ -0,0 +1,106 @@
+package core
+
+import (
+	"github.com/huichen/wukong/types"
+	"github.com/huichen/wukong/utils"
+	"log"
+	"sort"
+	"sync"
+)
+
+type Ranker struct {
+	lock struct {
+		sync.RWMutex
+		fields map[uint64]interface{}
+		docs   map[uint64]bool
+	}
+	initialized bool
+}
+
+func (ranker *Ranker) Init() {
+	if ranker.initialized == true {
+		log.Fatal("排序器不能初始化两次")
+	}
+	ranker.initialized = true
+
+	ranker.lock.fields = make(map[uint64]interface{})
+	ranker.lock.docs = make(map[uint64]bool)
+}
+
+// 给某个文档添加评分字段
+func (ranker *Ranker) AddDoc(docId uint64, fields interface{}) {
+	if ranker.initialized == false {
+		log.Fatal("排序器尚未初始化")
+	}
+
+	ranker.lock.Lock()
+	ranker.lock.fields[docId] = fields
+	ranker.lock.docs[docId] = true
+	ranker.lock.Unlock()
+}
+
+// 删除某个文档的评分字段
+func (ranker *Ranker) RemoveDoc(docId uint64) {
+	if ranker.initialized == false {
+		log.Fatal("排序器尚未初始化")
+	}
+
+	ranker.lock.Lock()
+	delete(ranker.lock.fields, docId)
+	delete(ranker.lock.docs, docId)
+	ranker.lock.Unlock()
+}
+
+// 给文档评分并排序
+func (ranker *Ranker) Rank(
+	docs []types.IndexedDocument, options types.RankOptions, countDocsOnly bool) (types.ScoredDocuments, int) {
+	if ranker.initialized == false {
+		log.Fatal("排序器尚未初始化")
+	}
+
+	// 对每个文档评分
+	var outputDocs types.ScoredDocuments
+	numDocs := 0
+	for _, d := range docs {
+		ranker.lock.RLock()
+		// 判断doc是否存在
+		if _, ok := ranker.lock.docs[d.DocId]; ok {
+			fs := ranker.lock.fields[d.DocId]
+			ranker.lock.RUnlock()
+			// 计算评分并剔除没有分值的文档
+			scores := options.ScoringCriteria.Score(d, fs)
+			if len(scores) > 0 {
+				if !countDocsOnly {
+					outputDocs = append(outputDocs, types.ScoredDocument{
+						DocId:                 d.DocId,
+						Scores:                scores,
+						TokenSnippetLocations: d.TokenSnippetLocations,
+						TokenLocations:        d.TokenLocations})
+				}
+				numDocs++
+			}
+		} else {
+			ranker.lock.RUnlock()
+		}
+	}
+
+	// 排序
+	if !countDocsOnly {
+		if options.ReverseOrder {
+			sort.Sort(sort.Reverse(outputDocs))
+		} else {
+			sort.Sort(outputDocs)
+		}
+		// 当用户要求只返回部分结果时返回部分结果
+		var start, end int
+		if options.MaxOutputs != 0 {
+			start = utils.MinInt(options.OutputOffset, len(outputDocs))
+			end = utils.MinInt(options.OutputOffset+options.MaxOutputs, len(outputDocs))
+		} else {
+			start = utils.MinInt(options.OutputOffset, len(outputDocs))
+			end = len(outputDocs)
+		}
+		return outputDocs[start:end], numDocs
+	}
+	return outputDocs, numDocs
+}
--- a/vendor/github.com/huichen/wukong/core/test_utils.go
+++ b/vendor/github.com/huichen/wukong/core/test_utils.go
@@ -0,0 +1,35 @@
+package core
+
+import (
+	"fmt"
+	"github.com/huichen/wukong/types"
+)
+
+func indicesToString(indexer *Indexer, token string) (output string) {
+	if indices, ok := indexer.tableLock.table[token]; ok {
+		for i := 0; i < indexer.getIndexLength(indices); i++ {
+			output += fmt.Sprintf("%d ",
+				indexer.getDocId(indices, i))
+		}
+	}
+	return
+}
+
+func indexedDocsToString(docs []types.IndexedDocument, numDocs int) (output string) {
+	for _, doc := range docs {
+		output += fmt.Sprintf("[%d %d %v] ",
+			doc.DocId, doc.TokenProximity, doc.TokenSnippetLocations)
+	}
+	return
+}
+
+func scoredDocsToString(docs []types.ScoredDocument) (output string) {
+	for _, doc := range docs {
+		output += fmt.Sprintf("[%d [", doc.DocId)
+		for _, score := range doc.Scores {
+			output += fmt.Sprintf("%d ", int(score*1000))
+		}
+		output += "]] "
+	}
+	return
+}
--- a/vendor/github.com/huichen/wukong/engine/counters.go
+++ b/vendor/github.com/huichen/wukong/engine/counters.go
@@ -0,0 +1,13 @@
+package engine
+
+func (engine *Engine) NumTokenIndexAdded() uint64 {
+	return engine.numTokenIndexAdded
+}
+
+func (engine *Engine) NumDocumentsIndexed() uint64 {
+	return engine.numDocumentsIndexed
+}
+
+func (engine *Engine) NumDocumentsRemoved() uint64 {
+	return engine.numDocumentsRemoved
+}
--- a/vendor/github.com/huichen/wukong/engine/engine.go
+++ b/vendor/github.com/huichen/wukong/engine/engine.go
@@ -0,0 +1,446 @@
+package engine
+
+import (
+	"fmt"
+	"github.com/huichen/murmur"
+	"github.com/huichen/sego"
+	"github.com/huichen/wukong/core"
+	"github.com/huichen/wukong/storage"
+	"github.com/huichen/wukong/types"
+	"github.com/huichen/wukong/utils"
+	"log"
+	"os"
+	"runtime"
+	"sort"
+	"strconv"
+	"sync/atomic"
+	"time"
+)
+
+const (
+	NumNanosecondsInAMillisecond = 1000000
+	PersistentStorageFilePrefix  = "wukong"
+)
+
+type Engine struct {
+	// 计数器，用来统计有多少文档被索引等信息
+	numDocumentsIndexed      uint64
+	numDocumentsRemoved      uint64
+	numDocumentsForceUpdated uint64
+	numIndexingRequests      uint64
+	numRemovingRequests      uint64
+	numForceUpdatingRequests uint64
+	numTokenIndexAdded       uint64
+	numDocumentsStored       uint64
+
+	// 记录初始化参数
+	initOptions types.EngineInitOptions
+	initialized bool
+
+	indexers   []core.Indexer
+	rankers    []core.Ranker
+	segmenter  sego.Segmenter
+	stopTokens StopTokens
+	dbs        []storage.Storage
+
+	// 建立索引器使用的通信通道
+	segmenterChannel         chan segmenterRequest
+	indexerAddDocChannels    []chan indexerAddDocumentRequest
+	indexerRemoveDocChannels []chan indexerRemoveDocRequest
+	rankerAddDocChannels     []chan rankerAddDocRequest
+
+	// 建立排序器使用的通信通道
+	indexerLookupChannels   []chan indexerLookupRequest
+	rankerRankChannels      []chan rankerRankRequest
+	rankerRemoveDocChannels []chan rankerRemoveDocRequest
+
+	// 建立持久存储使用的通信通道
+	persistentStorageIndexDocumentChannels []chan persistentStorageIndexDocumentRequest
+	persistentStorageInitChannel           chan bool
+}
+
+func (engine *Engine) Init(options types.EngineInitOptions) {
+	// 将线程数设置为CPU数
+	runtime.GOMAXPROCS(runtime.NumCPU())
+
+	// 初始化初始参数
+	if engine.initialized {
+		log.Fatal("请勿重复初始化引擎")
+	}
+	options.Init()
+	engine.initOptions = options
+	engine.initialized = true
+
+	if !options.NotUsingSegmenter {
+		// 载入分词器词典
+		engine.segmenter.LoadDictionary(options.SegmenterDictionaries)
+
+		// 初始化停用词
+		engine.stopTokens.Init(options.StopTokenFile)
+	}
+
+	// 初始化索引器和排序器
+	for shard := 0; shard < options.NumShards; shard++ {
+		engine.indexers = append(engine.indexers, core.Indexer{})
+		engine.indexers[shard].Init(*options.IndexerInitOptions)
+
+		engine.rankers = append(engine.rankers, core.Ranker{})
+		engine.rankers[shard].Init()
+	}
+
+	// 初始化分词器通道
+	engine.segmenterChannel = make(
+		chan segmenterRequest, options.NumSegmenterThreads)
+
+	// 初始化索引器通道
+	engine.indexerAddDocChannels = make(
+		[]chan indexerAddDocumentRequest, options.NumShards)
+	engine.indexerRemoveDocChannels = make(
+		[]chan indexerRemoveDocRequest, options.NumShards)
+	engine.indexerLookupChannels = make(
+		[]chan indexerLookupRequest, options.NumShards)
+	for shard := 0; shard < options.NumShards; shard++ {
+		engine.indexerAddDocChannels[shard] = make(
+			chan indexerAddDocumentRequest,
+			options.IndexerBufferLength)
+		engine.indexerRemoveDocChannels[shard] = make(
+			chan indexerRemoveDocRequest,
+			options.IndexerBufferLength)
+		engine.indexerLookupChannels[shard] = make(
+			chan indexerLookupRequest,
+			options.IndexerBufferLength)
+	}
+
+	// 初始化排序器通道
+	engine.rankerAddDocChannels = make(
+		[]chan rankerAddDocRequest, options.NumShards)
+	engine.rankerRankChannels = make(
+		[]chan rankerRankRequest, options.NumShards)
+	engine.rankerRemoveDocChannels = make(
+		[]chan rankerRemoveDocRequest, options.NumShards)
+	for shard := 0; shard < options.NumShards; shard++ {
+		engine.rankerAddDocChannels[shard] = make(
+			chan rankerAddDocRequest,
+			options.RankerBufferLength)
+		engine.rankerRankChannels[shard] = make(
+			chan rankerRankRequest,
+			options.RankerBufferLength)
+		engine.rankerRemoveDocChannels[shard] = make(
+			chan rankerRemoveDocRequest,
+			options.RankerBufferLength)
+	}
+
+	// 初始化持久化存储通道
+	if engine.initOptions.UsePersistentStorage {
+		engine.persistentStorageIndexDocumentChannels =
+			make([]chan persistentStorageIndexDocumentRequest,
+				engine.initOptions.PersistentStorageShards)
+		for shard := 0; shard < engine.initOptions.PersistentStorageShards; shard++ {
+			engine.persistentStorageIndexDocumentChannels[shard] = make(
+				chan persistentStorageIndexDocumentRequest)
+		}
+		engine.persistentStorageInitChannel = make(
+			chan bool, engine.initOptions.PersistentStorageShards)
+	}
+
+	// 启动分词器
+	for iThread := 0; iThread < options.NumSegmenterThreads; iThread++ {
+		go engine.segmenterWorker()
+	}
+
+	// 启动索引器和排序器
+	for shard := 0; shard < options.NumShards; shard++ {
+		go engine.indexerAddDocumentWorker(shard)
+		go engine.indexerRemoveDocWorker(shard)
+		go engine.rankerAddDocWorker(shard)
+		go engine.rankerRemoveDocWorker(shard)
+
+		for i := 0; i < options.NumIndexerThreadsPerShard; i++ {
+			go engine.indexerLookupWorker(shard)
+		}
+		for i := 0; i < options.NumRankerThreadsPerShard; i++ {
+			go engine.rankerRankWorker(shard)
+		}
+	}
+
+	// 启动持久化存储工作协程
+	if engine.initOptions.UsePersistentStorage {
+		err := os.MkdirAll(engine.initOptions.PersistentStorageFolder, 0700)
+		if err != nil {
+			log.Fatal("无法创建目录", engine.initOptions.PersistentStorageFolder)
+		}
+
+		// 打开或者创建数据库
+		engine.dbs = make([]storage.Storage, engine.initOptions.PersistentStorageShards)
+		for shard := 0; shard < engine.initOptions.PersistentStorageShards; shard++ {
+			dbPath := engine.initOptions.PersistentStorageFolder + "/" + PersistentStorageFilePrefix + "." + strconv.Itoa(shard)
+			db, err := storage.OpenStorage(dbPath)
+			if db == nil || err != nil {
+				log.Fatal("无法打开数据库", dbPath, ": ", err)
+			}
+			engine.dbs[shard] = db
+		}
+
+		// 从数据库中恢复
+		for shard := 0; shard < engine.initOptions.PersistentStorageShards; shard++ {
+			go engine.persistentStorageInitWorker(shard)
+		}
+
+		// 等待恢复完成
+		for shard := 0; shard < engine.initOptions.PersistentStorageShards; shard++ {
+			<-engine.persistentStorageInitChannel
+		}
+		for {
+			runtime.Gosched()
+			if engine.numIndexingRequests == engine.numDocumentsIndexed {
+				break
+			}
+		}
+
+		// 关闭并重新打开数据库
+		for shard := 0; shard < engine.initOptions.PersistentStorageShards; shard++ {
+			engine.dbs[shard].Close()
+			dbPath := engine.initOptions.PersistentStorageFolder + "/" + PersistentStorageFilePrefix + "." + strconv.Itoa(shard)
+			db, err := storage.OpenStorage(dbPath)
+			if db == nil || err != nil {
+				log.Fatal("无法打开数据库", dbPath, ": ", err)
+			}
+			engine.dbs[shard] = db
+		}
+
+		for shard := 0; shard < engine.initOptions.PersistentStorageShards; shard++ {
+			go engine.persistentStorageIndexDocumentWorker(shard)
+		}
+	}
+
+	atomic.AddUint64(&engine.numDocumentsStored, engine.numIndexingRequests)
+}
+
+// 将文档加入索引
+//
+// 输入参数：
+//  docId	      标识文档编号，必须唯一，docId == 0 表示非法文档（用于强制刷新索引），[1, +oo) 表示合法文档
+//  data	      见DocumentIndexData注释
+//  forceUpdate 是否强制刷新 cache，如果设为 true，则尽快添加到索引，否则等待 cache 满之后一次全量添加
+//
+// 注意：
+//      1. 这个函数是线程安全的，请尽可能并发调用以提高索引速度
+//      2. 这个函数调用是非同步的，也就是说在函数返回时有可能文档还没有加入索引中，因此
+//         如果立刻调用Search可能无法查询到这个文档。强制刷新索引请调用FlushIndex函数。
+func (engine *Engine) IndexDocument(docId uint64, data types.DocumentIndexData, forceUpdate bool) {
+	engine.internalIndexDocument(docId, data, forceUpdate)
+
+	hash := murmur.Murmur3([]byte(fmt.Sprint("%d", docId))) % uint32(engine.initOptions.PersistentStorageShards)
+	if engine.initOptions.UsePersistentStorage && docId != 0 {
+		engine.persistentStorageIndexDocumentChannels[hash] <- persistentStorageIndexDocumentRequest{docId: docId, data: data}
+	}
+}
+
+func (engine *Engine) internalIndexDocument(
+	docId uint64, data types.DocumentIndexData, forceUpdate bool) {
+	if !engine.initialized {
+		log.Fatal("必须先初始化引擎")
+	}
+
+	if docId != 0 {
+		atomic.AddUint64(&engine.numIndexingRequests, 1)
+	}
+	if forceUpdate {
+		atomic.AddUint64(&engine.numForceUpdatingRequests, 1)
+	}
+	hash := murmur.Murmur3([]byte(fmt.Sprint("%d%s", docId, data.Content)))
+	engine.segmenterChannel <- segmenterRequest{
+		docId: docId, hash: hash, data: data, forceUpdate: forceUpdate}
+}
+
+// 将文档从索引中删除
+//
+// 输入参数：
+//  docId	      标识文档编号，必须唯一，docId == 0 表示非法文档（用于强制刷新索引），[1, +oo) 表示合法文档
+//  forceUpdate 是否强制刷新 cache，如果设为 true，则尽快删除索引，否则等待 cache 满之后一次全量删除
+//
+// 注意：
+//      1. 这个函数是线程安全的，请尽可能并发调用以提高索引速度
+//      2. 这个函数调用是非同步的，也就是说在函数返回时有可能文档还没有加入索引中，因此
+//         如果立刻调用Search可能无法查询到这个文档。强制刷新索引请调用FlushIndex函数。
+func (engine *Engine) RemoveDocument(docId uint64, forceUpdate bool) {
+	if !engine.initialized {
+		log.Fatal("必须先初始化引擎")
+	}
+
+	if docId != 0 {
+		atomic.AddUint64(&engine.numRemovingRequests, 1)
+	}
+	if forceUpdate {
+		atomic.AddUint64(&engine.numForceUpdatingRequests, 1)
+	}
+	for shard := 0; shard < engine.initOptions.NumShards; shard++ {
+		engine.indexerRemoveDocChannels[shard] <- indexerRemoveDocRequest{docId: docId, forceUpdate: forceUpdate}
+		if docId == 0 {
+			continue
+		}
+		engine.rankerRemoveDocChannels[shard] <- rankerRemoveDocRequest{docId: docId}
+	}
+
+	if engine.initOptions.UsePersistentStorage && docId != 0 {
+		// 从数据库中删除
+		hash := murmur.Murmur3([]byte(fmt.Sprint("%d", docId))) % uint32(engine.initOptions.PersistentStorageShards)
+		go engine.persistentStorageRemoveDocumentWorker(docId, hash)
+	}
+}
+
+// 查找满足搜索条件的文档，此函数线程安全
+func (engine *Engine) Search(request types.SearchRequest) (output types.SearchResponse) {
+	if !engine.initialized {
+		log.Fatal("必须先初始化引擎")
+	}
+
+	var rankOptions types.RankOptions
+	if request.RankOptions == nil {
+		rankOptions = *engine.initOptions.DefaultRankOptions
+	} else {
+		rankOptions = *request.RankOptions
+	}
+	if rankOptions.ScoringCriteria == nil {
+		rankOptions.ScoringCriteria = engine.initOptions.DefaultRankOptions.ScoringCriteria
+	}
+
+	// 收集关键词
+	tokens := []string{}
+	if request.Text != "" {
+		querySegments := engine.segmenter.Segment([]byte(request.Text))
+		for _, s := range querySegments {
+			token := s.Token().Text()
+			if !engine.stopTokens.IsStopToken(token) {
+				tokens = append(tokens, s.Token().Text())
+			}
+		}
+	} else {
+		for _, t := range request.Tokens {
+			tokens = append(tokens, t)
+		}
+	}
+
+	// 建立排序器返回的通信通道
+	rankerReturnChannel := make(
+		chan rankerReturnRequest, engine.initOptions.NumShards)
+
+	// 生成查找请求
+	lookupRequest := indexerLookupRequest{
+		countDocsOnly:       request.CountDocsOnly,
+		tokens:              tokens,
+		labels:              request.Labels,
+		docIds:              request.DocIds,
+		options:             rankOptions,
+		rankerReturnChannel: rankerReturnChannel,
+		orderless:           request.Orderless,
+	}
+
+	// 向索引器发送查找请求
+	for shard := 0; shard < engine.initOptions.NumShards; shard++ {
+		engine.indexerLookupChannels[shard] <- lookupRequest
+	}
+
+	// 从通信通道读取排序器的输出
+	numDocs := 0
+	rankOutput := types.ScoredDocuments{}
+	timeout := request.Timeout
+	isTimeout := false
+	if timeout <= 0 {
+		// 不设置超时
+		for shard := 0; shard < engine.initOptions.NumShards; shard++ {
+			rankerOutput := <-rankerReturnChannel
+			if !request.CountDocsOnly {
+				for _, doc := range rankerOutput.docs {
+					rankOutput = append(rankOutput, doc)
+				}
+			}
+			numDocs += rankerOutput.numDocs
+		}
+	} else {
+		// 设置超时
+		deadline := time.Now().Add(time.Nanosecond * time.Duration(NumNanosecondsInAMillisecond*request.Timeout))
+		for shard := 0; shard < engine.initOptions.NumShards; shard++ {
+			select {
+			case rankerOutput := <-rankerReturnChannel:
+				if !request.CountDocsOnly {
+					for _, doc := range rankerOutput.docs {
+						rankOutput = append(rankOutput, doc)
+					}
+				}
+				numDocs += rankerOutput.numDocs
+			case <-time.After(deadline.Sub(time.Now())):
+				isTimeout = true
+				break
+			}
+		}
+	}
+
+	// 再排序
+	if !request.CountDocsOnly && !request.Orderless {
+		if rankOptions.ReverseOrder {
+			sort.Sort(sort.Reverse(rankOutput))
+		} else {
+			sort.Sort(rankOutput)
+		}
+	}
+
+	// 准备输出
+	output.Tokens = tokens
+	// 仅当CountDocsOnly为false时才充填output.Docs
+	if !request.CountDocsOnly {
+		if request.Orderless {
+			// 无序状态无需对Offset截断
+			output.Docs = rankOutput
+		} else {
+			var start, end int
+			if rankOptions.MaxOutputs == 0 {
+				start = utils.MinInt(rankOptions.OutputOffset, len(rankOutput))
+				end = len(rankOutput)
+			} else {
+				start = utils.MinInt(rankOptions.OutputOffset, len(rankOutput))
+				end = utils.MinInt(start+rankOptions.MaxOutputs, len(rankOutput))
+			}
+			output.Docs = rankOutput[start:end]
+		}
+	}
+	output.NumDocs = numDocs
+	output.Timeout = isTimeout
+	return
+}
+
+// 阻塞等待直到所有索引添加完毕
+func (engine *Engine) FlushIndex() {
+	for {
+		runtime.Gosched()
+		if engine.numIndexingRequests == engine.numDocumentsIndexed &&
+			engine.numRemovingRequests*uint64(engine.initOptions.NumShards) == engine.numDocumentsRemoved &&
+			(!engine.initOptions.UsePersistentStorage || engine.numIndexingRequests == engine.numDocumentsStored) {
+			// 保证 CHANNEL 中 REQUESTS 全部被执行完
+			break
+		}
+	}
+	// 强制更新，保证其为最后的请求
+	engine.IndexDocument(0, types.DocumentIndexData{}, true)
+	for {
+		runtime.Gosched()
+		if engine.numForceUpdatingRequests*uint64(engine.initOptions.NumShards) == engine.numDocumentsForceUpdated {
+			return
+		}
+	}
+}
+
+// 关闭引擎
+func (engine *Engine) Close() {
+	engine.FlushIndex()
+	if engine.initOptions.UsePersistentStorage {
+		for _, db := range engine.dbs {
+			db.Close()
+		}
+	}
+}
+
+// 从文本hash得到要分配到的shard
+func (engine *Engine) getShard(hash uint32) int {
+	return int(hash - hash/uint32(engine.initOptions.NumShards)*uint32(engine.initOptions.NumShards))
+}
--- a/vendor/github.com/huichen/wukong/engine/indexer_worker.go
+++ b/vendor/github.com/huichen/wukong/engine/indexer_worker.go
@@ -0,0 +1,101 @@
+package engine
+
+import (
+	"github.com/huichen/wukong/types"
+	"sync/atomic"
+)
+
+type indexerAddDocumentRequest struct {
+	document    *types.DocumentIndex
+	forceUpdate bool
+}
+
+type indexerLookupRequest struct {
+	countDocsOnly       bool
+	tokens              []string
+	labels              []string
+	docIds              map[uint64]bool
+	options             types.RankOptions
+	rankerReturnChannel chan rankerReturnRequest
+	orderless           bool
+}
+
+type indexerRemoveDocRequest struct {
+	docId       uint64
+	forceUpdate bool
+}
+
+func (engine *Engine) indexerAddDocumentWorker(shard int) {
+	for {
+		request := <-engine.indexerAddDocChannels[shard]
+		engine.indexers[shard].AddDocumentToCache(request.document, request.forceUpdate)
+		if request.document != nil {
+			atomic.AddUint64(&engine.numTokenIndexAdded,
+				uint64(len(request.document.Keywords)))
+			atomic.AddUint64(&engine.numDocumentsIndexed, 1)
+		}
+		if request.forceUpdate {
+			atomic.AddUint64(&engine.numDocumentsForceUpdated, 1)
+		}
+	}
+}
+
+func (engine *Engine) indexerRemoveDocWorker(shard int) {
+	for {
+		request := <-engine.indexerRemoveDocChannels[shard]
+		engine.indexers[shard].RemoveDocumentToCache(request.docId, request.forceUpdate)
+		if request.docId != 0 {
+			atomic.AddUint64(&engine.numDocumentsRemoved, 1)
+		}
+		if request.forceUpdate {
+			atomic.AddUint64(&engine.numDocumentsForceUpdated, 1)
+		}
+	}
+}
+
+func (engine *Engine) indexerLookupWorker(shard int) {
+	for {
+		request := <-engine.indexerLookupChannels[shard]
+
+		var docs []types.IndexedDocument
+		var numDocs int
+		if request.docIds == nil {
+			docs, numDocs = engine.indexers[shard].Lookup(request.tokens, request.labels, nil, request.countDocsOnly)
+		} else {
+			docs, numDocs = engine.indexers[shard].Lookup(request.tokens, request.labels, request.docIds, request.countDocsOnly)
+		}
+
+		if request.countDocsOnly {
+			request.rankerReturnChannel <- rankerReturnRequest{numDocs: numDocs}
+			continue
+		}
+
+		if len(docs) == 0 {
+			request.rankerReturnChannel <- rankerReturnRequest{}
+			continue
+		}
+
+		if request.orderless {
+			var outputDocs []types.ScoredDocument
+			for _, d := range docs {
+				outputDocs = append(outputDocs, types.ScoredDocument{
+					DocId: d.DocId,
+					TokenSnippetLocations: d.TokenSnippetLocations,
+					TokenLocations:        d.TokenLocations})
+			}
+			request.rankerReturnChannel <- rankerReturnRequest{
+				docs:    outputDocs,
+				numDocs: len(outputDocs),
+			}
+			continue
+		}
+
+		rankerRequest := rankerRankRequest{
+			countDocsOnly:       request.countDocsOnly,
+			docs:                docs,
+			options:             request.options,
+			rankerReturnChannel: request.rankerReturnChannel,
+		}
+		engine.rankerRankChannels[shard] <- rankerRequest
+	}
+}
--- a/vendor/github.com/huichen/wukong/engine/persistent_storage_worker.go
+++ b/vendor/github.com/huichen/wukong/engine/persistent_storage_worker.go
@@ -0,0 +1,66 @@
+package engine
+
+import (
+	"bytes"
+	"encoding/binary"
+	"encoding/gob"
+	"github.com/huichen/wukong/types"
+	"sync/atomic"
+)
+
+type persistentStorageIndexDocumentRequest struct {
+	docId uint64
+	data  types.DocumentIndexData
+}
+
+func (engine *Engine) persistentStorageIndexDocumentWorker(shard int) {
+	for {
+		request := <-engine.persistentStorageIndexDocumentChannels[shard]
+
+		// 得到key
+		b := make([]byte, 10)
+		length := binary.PutUvarint(b, request.docId)
+
+		// 得到value
+		var buf bytes.Buffer
+		enc := gob.NewEncoder(&buf)
+		err := enc.Encode(request.data)
+		if err != nil {
+			atomic.AddUint64(&engine.numDocumentsStored, 1)
+			continue
+		}
+
+		// 将key-value写入数据库
+		engine.dbs[shard].Set(b[0:length], buf.Bytes())
+		atomic.AddUint64(&engine.numDocumentsStored, 1)
+	}
+}
+
+func (engine *Engine) persistentStorageRemoveDocumentWorker(docId uint64, shard uint32) {
+	// 得到key
+	b := make([]byte, 10)
+	length := binary.PutUvarint(b, docId)
+
+	// 从数据库删除该key
+	engine.dbs[shard].Delete(b[0:length])
+}
+
+func (engine *Engine) persistentStorageInitWorker(shard int) {
+	engine.dbs[shard].ForEach(func(k, v []byte) error {
+		key, value := k, v
+		// 得到docID
+		docId, _ := binary.Uvarint(key)
+
+		// 得到data
+		buf := bytes.NewReader(value)
+		dec := gob.NewDecoder(buf)
+		var data types.DocumentIndexData
+		err := dec.Decode(&data)
+		if err == nil {
+			// 添加索引
+			engine.internalIndexDocument(docId, data, false)
+		}
+		return nil
+	})
+	engine.persistentStorageInitChannel <- true
+}
--- a/vendor/github.com/huichen/wukong/engine/ranker_worker.go
+++ b/vendor/github.com/huichen/wukong/engine/ranker_worker.go
@@ -0,0 +1,52 @@
+package engine
+
+import (
+	"github.com/huichen/wukong/types"
+)
+
+type rankerAddDocRequest struct {
+	docId  uint64
+	fields interface{}
+}
+
+type rankerRankRequest struct {
+	docs                []types.IndexedDocument
+	options             types.RankOptions
+	rankerReturnChannel chan rankerReturnRequest
+	countDocsOnly       bool
+}
+
+type rankerReturnRequest struct {
+	docs    types.ScoredDocuments
+	numDocs int
+}
+
+type rankerRemoveDocRequest struct {
+	docId uint64
+}
+
+func (engine *Engine) rankerAddDocWorker(shard int) {
+	for {
+		request := <-engine.rankerAddDocChannels[shard]
+		engine.rankers[shard].AddDoc(request.docId, request.fields)
+	}
+}
+
+func (engine *Engine) rankerRankWorker(shard int) {
+	for {
+		request := <-engine.rankerRankChannels[shard]
+		if request.options.MaxOutputs != 0 {
+			request.options.MaxOutputs += request.options.OutputOffset
+		}
+		request.options.OutputOffset = 0
+		outputDocs, numDocs := engine.rankers[shard].Rank(request.docs, request.options, request.countDocsOnly)
+		request.rankerReturnChannel <- rankerReturnRequest{docs: outputDocs, numDocs: numDocs}
+	}
+}
+
+func (engine *Engine) rankerRemoveDocWorker(shard int) {
+	for {
+		request := <-engine.rankerRemoveDocChannels[shard]
+		engine.rankers[shard].RemoveDoc(request.docId)
+	}
+}
--- a/vendor/github.com/huichen/wukong/engine/segmenter_worker.go
+++ b/vendor/github.com/huichen/wukong/engine/segmenter_worker.go
@@ -0,0 +1,97 @@
+package engine
+
+import (
+	"github.com/huichen/wukong/types"
+)
+
+type segmenterRequest struct {
+	docId       uint64
+	hash        uint32
+	data        types.DocumentIndexData
+	forceUpdate bool
+}
+
+func (engine *Engine) segmenterWorker() {
+	for {
+		request := <-engine.segmenterChannel
+		if request.docId == 0 {
+			if request.forceUpdate {
+				for i := 0; i < engine.initOptions.NumShards; i++ {
+					engine.indexerAddDocChannels[i] <- indexerAddDocumentRequest{forceUpdate: true}
+				}
+			}
+			continue
+		}
+
+		shard := engine.getShard(request.hash)
+		tokensMap := make(map[string][]int)
+		numTokens := 0
+		if !engine.initOptions.NotUsingSegmenter && request.data.Content != "" {
+			// 当文档正文不为空时，优先从内容分词中得到关键词
+			segments := engine.segmenter.Segment([]byte(request.data.Content))
+			for _, segment := range segments {
+				token := segment.Token().Text()
+				if !engine.stopTokens.IsStopToken(token) {
+					tokensMap[token] = append(tokensMap[token], segment.Start())
+				}
+			}
+			numTokens = len(segments)
+		} else {
+			// 否则载入用户输入的关键词
+			for _, t := range request.data.Tokens {
+				if !engine.stopTokens.IsStopToken(t.Text) {
+					tokensMap[t.Text] = t.Locations
+				}
+			}
+			numTokens = len(request.data.Tokens)
+		}
+
+		// 加入非分词的文档标签
+		for _, label := range request.data.Labels {
+			if !engine.initOptions.NotUsingSegmenter {
+				if !engine.stopTokens.IsStopToken(label) {
+					//当正文中已存在关键字时，若不判断，位置信息将会丢失
+					if _, ok := tokensMap[label]; !ok {
+						tokensMap[label] = []int{}
+					}
+				}
+			} else {
+				//当正文中已存在关键字时，若不判断，位置信息将会丢失
+				if _, ok := tokensMap[label]; !ok {
+					tokensMap[label] = []int{}
+				}
+			}
+		}
+
+		indexerRequest := indexerAddDocumentRequest{
+			document: &types.DocumentIndex{
+				DocId:       request.docId,
+				TokenLength: float32(numTokens),
+				Keywords:    make([]types.KeywordIndex, len(tokensMap)),
+			},
+			forceUpdate: request.forceUpdate,
+		}
+		iTokens := 0
+		for k, v := range tokensMap {
+			indexerRequest.document.Keywords[iTokens] = types.KeywordIndex{
+				Text: k,
+				// 非分词标注的词频设置为0，不参与tf-idf计算
+				Frequency: float32(len(v)),
+				Starts:    v}
+			iTokens++
+		}
+
+		engine.indexerAddDocChannels[shard] <- indexerRequest
+		if request.forceUpdate {
+			for i := 0; i < engine.initOptions.NumShards; i++ {
+				if i == shard {
+					continue
+				}
+				engine.indexerAddDocChannels[i] <- indexerAddDocumentRequest{forceUpdate: true}
+			}
+		}
+		rankerRequest := rankerAddDocRequest{
+			docId: request.docId, fields: request.data.Fields}
+		engine.rankerAddDocChannels[shard] <- rankerRequest
+	}
+}
--- a/vendor/github.com/huichen/wukong/engine/stop_tokens.go
+++ b/vendor/github.com/huichen/wukong/engine/stop_tokens.go
@@ -0,0 +1,40 @@
+package engine
+
+import (
+	"bufio"
+	"log"
+	"os"
+)
+
+type StopTokens struct {
+	stopTokens map[string]bool
+}
+
+// 从stopTokenFile中读入停用词，一个词一行
+// 文档索引建立时会跳过这些停用词
+func (st *StopTokens) Init(stopTokenFile string) {
+	st.stopTokens = make(map[string]bool)
+	if stopTokenFile == "" {
+		return
+	}
+
+	file, err := os.Open(stopTokenFile)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer file.Close()
+
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		text := scanner.Text()
+		if text != "" {
+			st.stopTokens[text] = true
+		}
+	}
+
+}
+
+func (st *StopTokens) IsStopToken(token string) bool {
+	_, found := st.stopTokens[token]
+	return found
+}
--- a/vendor/github.com/huichen/wukong/license.txt
+++ b/vendor/github.com/huichen/wukong/license.txt
@@ -0,0 +1,13 @@
+Copyright 2013 Hui Chen
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
--- a/vendor/github.com/huichen/wukong/storage/bolt_storage.go
+++ b/vendor/github.com/huichen/wukong/storage/bolt_storage.go
@@ -0,0 +1,69 @@
+package storage
+
+import (
+	"github.com/boltdb/bolt"
+	"time"
+)
+
+var wukong_documents = []byte("wukong_documents")
+
+type boltStorage struct {
+	db *bolt.DB
+}
+
+func openBoltStorage(path string) (Storage, error) {
+	db, err := bolt.Open(path, 0600, &bolt.Options{Timeout: 3600 * time.Second})
+	if err != nil {
+		return nil, err
+	}
+	err = db.Update(func(tx *bolt.Tx) error {
+		_, err := tx.CreateBucketIfNotExists(wukong_documents)
+		return err
+	})
+	if err != nil {
+		db.Close()
+		return nil, err
+	}
+	return &boltStorage{db}, nil
+}
+
+func (s *boltStorage) WALName() string {
+	return s.db.Path()
+}
+
+func (s *boltStorage) Set(k []byte, v []byte) error {
+	return s.db.Update(func(tx *bolt.Tx) error {
+		return tx.Bucket(wukong_documents).Put(k, v)
+	})
+}
+
+func (s *boltStorage) Get(k []byte) (b []byte, err error) {
+	err = s.db.View(func(tx *bolt.Tx) error {
+		b = tx.Bucket(wukong_documents).Get(k)
+		return nil
+	})
+	return
+}
+
+func (s *boltStorage) Delete(k []byte) error {
+	return s.db.Update(func(tx *bolt.Tx) error {
+		return tx.Bucket(wukong_documents).Delete(k)
+	})
+}
+
+func (s *boltStorage) ForEach(fn func(k, v []byte) error) error {
+	return s.db.View(func(tx *bolt.Tx) error {
+		b := tx.Bucket(wukong_documents)
+		c := b.Cursor()
+		for k, v := c.First(); k != nil; k, v = c.Next() {
+			if err := fn(k, v); err != nil {
+				return err
+			}
+		}
+		return nil
+	})
+}
+
+func (s *boltStorage) Close() error {
+	return s.db.Close()
+}
--- a/vendor/github.com/huichen/wukong/storage/kv_storage.go
+++ b/vendor/github.com/huichen/wukong/storage/kv_storage.go
@@ -0,0 +1,64 @@
+package storage
+
+import (
+	"github.com/cznic/kv"
+	"io"
+)
+
+type kvStorage struct {
+	db *kv.DB
+}
+
+func openKVStorage(path string) (Storage, error) {
+	options := &kv.Options{}
+	db, errOpen := kv.Open(path, options)
+	if errOpen != nil {
+		var errCreate error
+		db, errCreate = kv.Create(path, options)
+		if errCreate != nil {
+			return &kvStorage{db}, errCreate
+		}
+	}
+	return &kvStorage{db}, nil
+}
+
+func (s *kvStorage) WALName() string {
+	return s.db.WALName()
+}
+
+func (s *kvStorage) Set(k []byte, v []byte) error {
+	return s.db.Set(k, v)
+}
+
+func (s *kvStorage) Get(k []byte) ([]byte, error) {
+	return s.db.Get(nil, k)
+}
+
+func (s *kvStorage) Delete(k []byte) error {
+	return s.db.Delete(k)
+}
+
+func (s *kvStorage) ForEach(fn func(k, v []byte) error) error {
+	iter, err := s.db.SeekFirst()
+	if err == io.EOF {
+		return nil
+	} else if err != nil {
+		return err
+	}
+	for {
+		key, value, err := iter.Next()
+		if err == io.EOF {
+			break
+		} else if err != nil {
+			return err
+		}
+		if err := fn(key, value); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (s *kvStorage) Close() error {
+	return s.db.Close()
+}
--- a/vendor/github.com/huichen/wukong/storage/storage.go
+++ b/vendor/github.com/huichen/wukong/storage/storage.go
@@ -0,0 +1,37 @@
+package storage
+
+import (
+	"fmt"
+	"os"
+)
+
+const DEFAULT_STORAGE_ENGINE = "bolt"
+
+var supportedStorage = map[string]func(path string) (Storage, error){
+	"kv":   openKVStorage,
+	"bolt": openBoltStorage,
+}
+
+func RegisterStorageEngine(name string, fn func(path string) (Storage, error)) {
+	supportedStorage[name] = fn
+}
+
+type Storage interface {
+	Set(k, v []byte) error
+	Get(k []byte) ([]byte, error)
+	Delete(k []byte) error
+	ForEach(fn func(k, v []byte) error) error
+	Close() error
+	WALName() string
+}
+
+func OpenStorage(path string) (Storage, error) {
+	wse := os.Getenv("WUKONG_STORAGE_ENGINE")
+	if wse == "" {
+		wse = DEFAULT_STORAGE_ENGINE
+	}
+	if fn, has := supportedStorage[wse]; has {
+		return fn(path)
+	}
+	return nil, fmt.Errorf("unsupported storage engine %v", wse)
+}
--- a/vendor/github.com/huichen/wukong/types/document_index_data.go
+++ b/vendor/github.com/huichen/wukong/types/document_index_data.go
@@ -0,0 +1,27 @@
+package types
+
+type DocumentIndexData struct {
+	// 文档全文（必须是UTF-8格式），用于生成待索引的关键词
+	Content string
+
+	// 文档的关键词
+	// 当Content不为空的时候，优先从Content中分词得到关键词。
+	// Tokens存在的意义在于绕过悟空内置的分词器，在引擎外部
+	// 进行分词和预处理。
+	Tokens []TokenData
+
+	// 文档标签（必须是UTF-8格式），比如文档的类别属性等，这些标签并不出现在文档文本中
+	Labels []string
+
+	// 文档的评分字段，可以接纳任何类型的结构体
+	Fields interface{}
+}
+
+// 文档的一个关键词
+type TokenData struct {
+	// 关键词的字符串
+	Text string
+
+	// 关键词的首字节在文档中出现的位置
+	Locations []int
+}
--- a/vendor/github.com/huichen/wukong/types/engine_init_options.go
+++ b/vendor/github.com/huichen/wukong/types/engine_init_options.go
@@ -0,0 +1,126 @@
+package types
+
+import (
+	"log"
+	"runtime"
+)
+
+var (
+	// EngineInitOptions的默认值
+	defaultNumSegmenterThreads       = runtime.NumCPU()
+	defaultNumShards                 = 2
+	defaultIndexerBufferLength       = runtime.NumCPU()
+	defaultNumIndexerThreadsPerShard = runtime.NumCPU()
+	defaultRankerBufferLength        = runtime.NumCPU()
+	defaultNumRankerThreadsPerShard  = runtime.NumCPU()
+	defaultDefaultRankOptions        = RankOptions{
+		ScoringCriteria: RankByBM25{},
+	}
+	defaultIndexerInitOptions = IndexerInitOptions{
+		IndexType:      FrequenciesIndex,
+		BM25Parameters: &defaultBM25Parameters,
+	}
+	defaultBM25Parameters = BM25Parameters{
+		K1: 2.0,
+		B:  0.75,
+	}
+	defaultPersistentStorageShards = 8
+)
+
+type EngineInitOptions struct {
+	// 是否使用分词器
+	// 默认使用，否则在启动阶段跳过SegmenterDictionaries和StopTokenFile设置
+	// 如果你不需要在引擎内分词，可以将这个选项设为true
+	// 注意，如果你不用分词器，那么在调用IndexDocument时DocumentIndexData中的Content会被忽略
+	NotUsingSegmenter bool
+
+	// 半角逗号分隔的字典文件，具体用法见
+	// sego.Segmenter.LoadDictionary函数的注释
+	SegmenterDictionaries string
+
+	// 停用词文件
+	StopTokenFile string
+
+	// 分词器线程数
+	NumSegmenterThreads int
+
+	// 索引器和排序器的shard数目
+	// 被检索/排序的文档会被均匀分配到各个shard中
+	NumShards int
+
+	// 索引器的信道缓冲长度
+	IndexerBufferLength int
+
+	// 索引器每个shard分配的线程数
+	NumIndexerThreadsPerShard int
+
+	// 排序器的信道缓冲长度
+	RankerBufferLength int
+
+	// 排序器每个shard分配的线程数
+	NumRankerThreadsPerShard int
+
+	// 索引器初始化选项
+	IndexerInitOptions *IndexerInitOptions
+
+	// 默认的搜索选项
+	DefaultRankOptions *RankOptions
+
+	// 是否使用持久数据库，以及数据库文件保存的目录和裂分数目
+	UsePersistentStorage    bool
+	PersistentStorageFolder string
+	PersistentStorageShards int
+}
+
+// 初始化EngineInitOptions，当用户未设定某个选项的值时用默认值取代
+func (options *EngineInitOptions) Init() {
+	if !options.NotUsingSegmenter {
+		if options.SegmenterDictionaries == "" {
+			log.Fatal("字典文件不能为空")
+		}
+	}
+
+	if options.NumSegmenterThreads == 0 {
+		options.NumSegmenterThreads = defaultNumSegmenterThreads
+	}
+
+	if options.NumShards == 0 {
+		options.NumShards = defaultNumShards
+	}
+
+	if options.IndexerBufferLength == 0 {
+		options.IndexerBufferLength = defaultIndexerBufferLength
+	}
+
+	if options.NumIndexerThreadsPerShard == 0 {
+		options.NumIndexerThreadsPerShard = defaultNumIndexerThreadsPerShard
+	}
+
+	if options.RankerBufferLength == 0 {
+		options.RankerBufferLength = defaultRankerBufferLength
+	}
+
+	if options.NumRankerThreadsPerShard == 0 {
+		options.NumRankerThreadsPerShard = defaultNumRankerThreadsPerShard
+	}
+
+	if options.IndexerInitOptions == nil {
+		options.IndexerInitOptions = &defaultIndexerInitOptions
+	}
+
+	if options.IndexerInitOptions.BM25Parameters == nil {
+		options.IndexerInitOptions.BM25Parameters = &defaultBM25Parameters
+	}
+
+	if options.DefaultRankOptions == nil {
+		options.DefaultRankOptions = &defaultDefaultRankOptions
+	}
+
+	if options.DefaultRankOptions.ScoringCriteria == nil {
+		options.DefaultRankOptions.ScoringCriteria = defaultDefaultRankOptions.ScoringCriteria
+	}
+
+	if options.PersistentStorageShards == 0 {
+		options.PersistentStorageShards = defaultPersistentStorageShards
+	}
+}
--- a/vendor/github.com/huichen/wukong/types/index.go
+++ b/vendor/github.com/huichen/wukong/types/index.go
@@ -0,0 +1,70 @@
+package types
+
+type DocumentIndex struct {
+	// 文本的DocId
+	DocId uint64
+
+	// 文本的关键词长
+	TokenLength float32
+
+	// 加入的索引键
+	Keywords []KeywordIndex
+}
+
+// 反向索引项，这实际上标注了一个（搜索键，文档）对。
+type KeywordIndex struct {
+	// 搜索键的UTF-8文本
+	Text string
+
+	// 搜索键词频
+	Frequency float32
+
+	// 搜索键在文档中的起始字节位置，按照升序排列
+	Starts []int
+}
+
+// 索引器返回结果
+type IndexedDocument struct {
+	DocId uint64
+
+	// BM25，仅当索引类型为FrequenciesIndex或者LocationsIndex时返回有效值
+	BM25 float32
+
+	// 关键词在文档中的紧邻距离，紧邻距离的含义见computeTokenProximity的注释。
+	// 仅当索引类型为LocationsIndex时返回有效值。
+	TokenProximity int32
+
+	// 紧邻距离计算得到的关键词位置，和Lookup函数输入tokens的长度一样且一一对应。
+	// 仅当索引类型为LocationsIndex时返回有效值。
+	TokenSnippetLocations []int
+
+	// 关键词在文本中的具体位置。
+	// 仅当索引类型为LocationsIndex时返回有效值。
+	TokenLocations [][]int
+}
+
+// 方便批量加入文档索引
+type DocumentsIndex []*DocumentIndex
+
+func (docs DocumentsIndex) Len() int {
+	return len(docs)
+}
+func (docs DocumentsIndex) Swap(i, j int) {
+	docs[i], docs[j] = docs[j], docs[i]
+}
+func (docs DocumentsIndex) Less(i, j int) bool {
+	return docs[i].DocId < docs[j].DocId
+}
+
+// 方便批量删除文档索引
+type DocumentsId []uint64
+
+func (docs DocumentsId) Len() int {
+	return len(docs)
+}
+func (docs DocumentsId) Swap(i, j int) {
+	docs[i], docs[j] = docs[j], docs[i]
+}
+func (docs DocumentsId) Less(i, j int) bool {
+	return docs[i] < docs[j]
+}
--- a/vendor/github.com/huichen/wukong/types/indexer_init_options.go
+++ b/vendor/github.com/huichen/wukong/types/indexer_init_options.go
@@ -0,0 +1,42 @@
+package types
+
+// 这些常数定义了反向索引表存储的数据类型
+const (
+	// 仅存储文档的docId
+	DocIdsIndex = 0
+
+	// 存储关键词的词频，用于计算BM25
+	FrequenciesIndex = 1
+
+	// 存储关键词在文档中出现的具体字节位置（可能有多个）
+	// 如果你希望得到关键词紧邻度数据，必须使用LocationsIndex类型的索引
+	LocationsIndex = 2
+
+	// 默认插入索引表文档 CACHE SIZE
+	defaultDocCacheSize = 300000
+)
+
+// 初始化索引器选项
+type IndexerInitOptions struct {
+	// 索引表的类型，见上面的常数
+	IndexType int
+
+	// 待插入索引表文档 CACHE SIZE
+	DocCacheSize int
+
+	// BM25参数
+	BM25Parameters *BM25Parameters
+}
+
+// 见http://en.wikipedia.org/wiki/Okapi_BM25
+// 默认值见engine_init_options.go
+type BM25Parameters struct {
+	K1 float32
+	B  float32
+}
+
+func (options *IndexerInitOptions) Init() {
+	if options.DocCacheSize == 0 {
+		options.DocCacheSize = defaultDocCacheSize
+	}
+}
--- a/vendor/github.com/huichen/wukong/types/scoring_criteria.go
+++ b/vendor/github.com/huichen/wukong/types/scoring_criteria.go
@@ -0,0 +1,17 @@
+package types
+
+// 评分规则通用接口
+type ScoringCriteria interface {
+	// 给一个文档评分，文档排序时先用第一个分值比较，如果
+	// 分值相同则转移到第二个分值，以此类推。
+	// 返回空切片表明该文档应该从最终排序结果中剔除。
+	Score(doc IndexedDocument, fields interface{}) []float32
+}
+
+// 一个简单的评分规则，文档分数为BM25
+type RankByBM25 struct {
+}
+
+func (rule RankByBM25) Score(doc IndexedDocument, fields interface{}) []float32 {
+	return []float32{doc.BM25}
+}
--- a/vendor/github.com/huichen/wukong/types/search_request.go
+++ b/vendor/github.com/huichen/wukong/types/search_request.go
@@ -0,0 +1,45 @@
+package types
+
+type SearchRequest struct {
+	// 搜索的短语（必须是UTF-8格式），会被分词
+	// 当值为空字符串时关键词会从下面的Tokens读入
+	Text string
+
+	// 关键词（必须是UTF-8格式），当Text不为空时优先使用Text
+	// 通常你不需要自己指定关键词，除非你运行自己的分词程序
+	Tokens []string
+
+	// 文档标签（必须是UTF-8格式），标签不存在文档文本中，但也属于搜索键的一种
+	Labels []string
+
+	// 当不为nil时，仅从这些DocIds包含的键中搜索（忽略值）
+	DocIds map[uint64]bool
+
+	// 排序选项
+	RankOptions *RankOptions
+
+	// 超时，单位毫秒（千分之一秒）。此值小于等于零时不设超时。
+	// 搜索超时的情况下仍有可能返回部分排序结果。
+	Timeout int
+
+	// 设为true时仅统计搜索到的文档个数，不返回具体的文档
+	CountDocsOnly bool
+
+	// 不排序，对于可在引擎外部（比如客户端）排序情况适用
+	// 对返回文档很多的情况打开此选项可以有效节省时间
+	Orderless bool
+}
+
+type RankOptions struct {
+	// 文档的评分规则，值为nil时使用Engine初始化时设定的规则
+	ScoringCriteria ScoringCriteria
+
+	// 默认情况下（ReverseOrder=false）按照分数从大到小排序，否则从小到大排序
+	ReverseOrder bool
+
+	// 从第几条结果开始输出
+	OutputOffset int
+
+	// 最大输出的搜索结果数，为0时无限制
+	MaxOutputs int
+}
--- a/vendor/github.com/huichen/wukong/types/search_response.go
+++ b/vendor/github.com/huichen/wukong/types/search_response.go
@@ -0,0 +1,57 @@
+package types
+
+import (
+	"github.com/huichen/wukong/utils"
+)
+
+type SearchResponse struct {
+	// 搜索用到的关键词
+	Tokens []string
+
+	// 搜索到的文档，已排序
+	Docs []ScoredDocument
+
+	// 搜索是否超时。超时的情况下也可能会返回部分结果
+	Timeout bool
+
+	// 搜索到的文档个数。注意这是全部文档中满足条件的个数，可能比返回的文档数要大
+	NumDocs int
+}
+
+type ScoredDocument struct {
+	DocId uint64
+
+	// 文档的打分值
+	// 搜索结果按照Scores的值排序，先按照第一个数排，如果相同则按照第二个数排序，依次类推。
+	Scores []float32
+
+	// 用于生成摘要的关键词在文本中的字节位置，该切片长度和SearchResponse.Tokens的长度一样
+	// 只有当IndexType == LocationsIndex时不为空
+	TokenSnippetLocations []int
+
+	// 关键词出现的位置
+	// 只有当IndexType == LocationsIndex时不为空
+	TokenLocations [][]int
+}
+
+// 为了方便排序
+
+type ScoredDocuments []ScoredDocument
+
+func (docs ScoredDocuments) Len() int {
+	return len(docs)
+}
+func (docs ScoredDocuments) Swap(i, j int) {
+	docs[i], docs[j] = docs[j], docs[i]
+}
+func (docs ScoredDocuments) Less(i, j int) bool {
+	// 为了从大到小排序，这实际上实现的是More的功能
+	for iScore := 0; iScore < utils.MinInt(len(docs[i].Scores), len(docs[j].Scores)); iScore++ {
+		if docs[i].Scores[iScore] > docs[j].Scores[iScore] {
+			return true
+		} else if docs[i].Scores[iScore] < docs[j].Scores[iScore] {
+			return false
+		}
+	}
+	return len(docs[i].Scores) > len(docs[j].Scores)
+}
--- a/vendor/github.com/huichen/wukong/utils/test_utils.go
+++ b/vendor/github.com/huichen/wukong/utils/test_utils.go
@@ -0,0 +1,13 @@
+package utils
+
+import (
+	"fmt"
+	"testing"
+)
+
+func Expect(t *testing.T, expect string, actual interface{}) {
+	actualString := fmt.Sprint(actual)
+	if expect != actualString {
+		t.Errorf("期待值=\"%s\", 实际=\"%s\"", expect, actualString)
+	}
+}
--- a/vendor/github.com/huichen/wukong/utils/utils.go
+++ b/vendor/github.com/huichen/wukong/utils/utils.go
@@ -0,0 +1,15 @@
+package utils
+
+func AbsInt(a int) int {
+	if a < 0 {
+		return -a
+	}
+	return a
+}
+
+func MinInt(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}